This R code includes the analyses and figures of the CPS1000 manuscript, written by Kevin D. Hofer and Junyan Lu.

1 Setup

library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE)

1.1 Load libraries

library(ggpmisc)
suppressMessages(library(RColorBrewer))
suppressMessages(library(ggrepel))
suppressMessages(library(cowplot))
suppressMessages(library(gridExtra))
suppressMessages(library(drc))
suppressMessages(library(tidyverse))
detach("package:dplyr") 
suppressMessages(library(dplyr))
library("survminer")
library("survival")
library("maxstat")
library("patchwork")
library("glmnet")
library("corrplot")
library("writexl")
library("ggplot2")
library("flextable")
library("ggpubr")
library("ComplexHeatmap")
library("circlize")
library("grid")
library("RColorBrewer")
library("ggplotify")
library("magrittr")
library("Rtsne")
library("paletteer")
library("ggtext")
library("ggh4x")
library("plotrix")
library("scales")
library("corrplot")
library("gridGraphics")
library("draw")
library("stringr")
library("ggsurvfit")
library("BloodCancerMultiOmics2017")
library("gtable")
library("statebins")
library("DESeq2")
library("apeglm")
library("fgsea")
library("msigdbr")
library("biomaRt")
library("org.Hs.eg.db")
library("ggtext")
library("flextable")
library("data.table")
library("readxl")
library("tidygraph")
library("ggraph")
library("stats")


#maintain function of specific R packages
rename <- dplyr::rename
count  <- dplyr::count
filter <- dplyr::filter

1.2 Define figure themes

theme_figures <- theme_classic()+
  theme(plot.title = element_text(size=8, hjust=0.5, face="bold"),
        axis.text = element_text(color="black", size=8), 
        axis.title = element_text(size=8), 
        axis.line = element_line(size=0.5), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text = element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))

theme_figures_angle45_x <- theme_classic()+
  theme(plot.title=element_text(size=8, hjust=0.5, face="bold"),
        axis.text.x = element_text(color="black", angle = 45, hjust=1, vjust=1, size=8), 
        axis.text.y = element_text(size=8), 
        axis.title = element_text(size=8), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text = element_text(size=8), 
        axis.line = element_line(size=0.5), 
        legend.key.size = unit(0.5, 'cm'))

theme_figures_facet <- theme_bw() +   
  theme(strip.text.y = element_text(size = 0), 
        strip.text.x = element_text(size=8),
        strip.background.y = element_rect(color=NA),
        axis.text = element_text(size=8, color="black"),
        axis.title = element_text(size=8, color="black"),
        panel.spacing = unit(0.15, "lines"), 
        legend.position = "none",
        panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
        panel.grid = element_blank(),
        strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
        plot.title = element_text(hjust=0.5, size=8, face="bold"))

theme_figures_facet_angle60_x <- theme_bw() +   
  theme(strip.text.y = element_text(size = 0), 
        strip.text.x = element_text(size=8),
        strip.background.y = element_rect(color=NA),
        axis.title = element_text(size=8), 
        axis.text.y = element_text(size =8, color="black"),
        axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size=8, color = "black"),
        panel.spacing = unit(0.15, "lines"), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'),
        panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
        strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
        plot.title = element_text(hjust=0.5, size=8, face="bold"), 
        strip.text = element_text(size = 8))

theme_figures_facet_angle45_x  <- theme_figures_facet +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

theme_figures_facet_angle45_x_legend <- theme_figures_facet_angle45_x +
  theme(legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))
  
theme_figures_border <- theme_bw() +
  theme(axis.text = element_text(color="black", size=8),
        axis.title = element_text(size=8), 
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'),
        plot.title = element_text(hjust=0.5, size=8, face="bold"), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = 'black', 
                                    fill = NA, 
                                    size = 1))

Set colors

#diagnosis
colors_diag <- setNames(c("#1F78B4", "#B2DF8A", "#A6CEE3", "grey", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6"), c("MCL", "CLL", "T-PLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL"))

colors_diag_IGHV <- setNames(c("#1F78B4", "#B2DF8A", "#33A02C", "grey80", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6", "#A6CEE3"), c("MCL", "U-CLL", "M-CLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL", "T-PLL"))

#pathways
colors_pathways_mod <- setNames(c("lightgrey", "#BC80BD", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7"), c("other", "AKT/mTOR", "DNA damage response", "Chemotherapy" ,  "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT"))

colors_pathways_mod2 <- setNames(c("lightgrey", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7", "#A6CEE3", "#6DBD57", "#DE9E83", "#BC80BD"), c("other", "DNA damage response", "Chemotherapy" ,  "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT", "Apoptosis", "Cell cycle control", "Histone methylation", "PI3K/AKT/mTOR"))

colors_cor_heatmap <- setNames(c("#BC80BD", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA"), c("AKT/mTOR", "MEK/ERK" , "B-cell receptor", "PI3K", "Stress pathway"))

#mode of action
colors_drug_type <- c("Chemo" = "#9a6a3d", "Kinase" = "#6A3D9A", "Other" = "#3d9a6a")
labels_drug_type <- c("Chemo" = "Chemotherapy", "Kinase" ="Kinase inhibitor", "Other" = "Other")

#genetics
colors_ddx3x <- setNames(c("#E78AC3", "grey60"), c(1, 0))

colors_del11q <- setNames(c("#E5C494", "grey60"), c(1, 0))

#longitudinal analysis
colors_treatment <- setNames(c("black", "salmon"), c("sample1_value", "sample2_value"))

#cox regression models
colors_cox <- setNames(c("#2171b5", "#cb181d"), c("TTT", "OS"))

colors_ibr_viab <- setNames(c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"), 
                            c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"))

1.3 Define functions

Functions for analyses

#function for t-test
tTest <- function(val, fac) {
  res <- t.test(val ~ fac, var.equal = TRUE)
  data.frame(p = res$p.value, 
             mean1 = res$estimate[[1]],
             mean2 = res$estimate[[2]],
             diff = res$estimate[[2]] - res$estimate[[1]])
}

#helper function for mean viability boxplots
create_boxplot <- function(data, diagnosis_filter, colors, title) {
  #order drugs by median viability
  drug_order <- data |> 
    filter(diagnosis == diagnosis_filter) |> 
    group_by(drug) |> 
    summarise(median = median(mean_viab)) |> 
    arrange(desc(median)) |> 
    pull(drug)
  
  #filter and reorder data
  plot_data <- data |> 
    filter(diagnosis == diagnosis_filter) |> 
    mutate(drug = factor(drug, levels = drug_order))
  
  #create plot
  plot_data |> 
    filter(diagnosis == diagnosis_filter) |> 
    ggplot(aes(x=mean_viab, y=drug, color=diagnosis))+ 
    geom_boxplot(alpha=0.8, outlier.shape=NA) +
    geom_point(alpha=0.2, size=1)+
    scale_x_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
    theme_bw() +
    theme(
      axis.text = element_text(color = "black", size = 8),
      axis.title = element_text(size = 8),
      plot.title = element_text(size = 8, hjust = 0.5, face = "bold"),
      legend.position = "none",
      panel.border = element_rect(color = "black", linewidth = 1, fill = NA)) +
    labs(y = "", x = "Mean viability", title = title) +
    scale_color_manual(values = colors_diag_IGHV)
}

#function to remove highly correlated values and keep track of the removing (binomial regression)
removeCorrelated <- function(x, cutoff = 0.6, method = "pearson", keep = NULL) {
  if (!is.null(keep)) {
    #if specified some feature to keep, then reorder the matrix, to make sure they are not collapsed
    posNow <- grepl(paste(keep, collapse = "|"), colnames(x))
    posNew <- rev(c(colnames(x)[posNow],colnames(x)[!posNow]))
    x <- x[,posNew]
  }
  #input is a feature matrix, features are in columns
  if (method == "binary") {
    #use binary similarity if input is a binary matrix,
    #maybe also usefull is the input is a sparse matrix
    simiMat <- 1 - as.matrix(dist(t(x), method = "binary"))
  } else if (method == "pearson") {
    #otherwise, using pearson correlation
    simiMat <- cor(x)
  } else if (method == "euclidean") {
    simiMat <- 1 - as.matrix(dist(t(x), method = "euclidean"))
  } else if (method == "cosine") {
    # cosine similarity maybe prefered for sparse matrix
    cosineSimi <- function(x){
      x%*%t(x)/(sqrt(rowSums(x^2) %*% t(rowSums(x^2))))
    }
    simiMat <- cosineSimi(t(x))
  } else if (method == "canberra") {
    simiMat <- 1 - as.matrix(dist(t(x), method = "canberra"))/nrow(x)
  }
  
  #generate reduced matrix
  simiMat.ori <- simiMat
  simiMat[upper.tri(simiMat)] <- 0
  diag(simiMat) <- 0
  x.re <- x[,!apply(simiMat, 2, function(n) any(abs(n) >= cutoff))]
  
  #a matrix keeping track of the removed features
  mapReduce <- simiMat.ori
  diag(mapReduce) <- 0
  mapList <- lapply(colnames(x.re), function(i) colnames(mapReduce)[mapReduce[i,]>=cutoff])
  names(mapList) <- colnames(x.re)
  
  return(list(reduced = x.re, 
              mapReduce = mapList))
}

#function for multi-variant binomial regression
runGlm.bin <- function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  aucList <- c()
  coefMat  <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)
  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  for (i in seq(repeats)) {
      #balanced sampling
      vecFold <- mltools::folds(y,nfolds = folds, stratified = TRUE)
      res <- cv.glmnet(X,y, type.measure = "auc",
                        foldid = vecFold, alpha = alpha, standardize = FALSE,
                       intercept = TRUE, family = "binomial")
      lambdaList <- c(lambdaList, res$lambda.1se)
      aucList <- c(aucList, res$cvm[res$lambda == res$lambda.1se])
      modelList[[i]] <- res
      
      coefMat[,i] <- coef(res, s = "lambda.1se")[-1]
  }
  list(modelList = modelList, lambdaList = lambdaList, aucList = aucList, coefMat = coefMat)
}


#sigmoid curve fitting
testF <- function(m0, m1) {
  Fstat <- ((sum(residuals(m0)^2) - sum(residuals(m1)^2))/(m0$df.residual-m1$df.residual))/(sum(residuals(m1)^2)/m1$df.residual)
  1 - pf(Fstat, m0$df.residual-m1$df.residual, m1$df.residual)
}

calcAUC <- function(m1, minConc =0, maxConc=15, n=100) {
  concSeq <- data.frame(conc = seq(minConc, maxConc, length.out = n))
  valueConc <- data.frame(viab = predict(m1, concSeq),
                          conc = concSeq[,1])
  valueConc <- valueConc[order(valueConc$conc),]
  areaTotal <- 0
  for (i in seq(nrow(valueConc)-1)) {
    areaEach <- (valueConc$viab[i] + valueConc$viab[i+1])*(valueConc$conc[i+1]-valueConc$conc[i])*0.5
    areaTotal <- areaTotal + areaEach
  }
  areaTotal/(valueConc[nrow(valueConc),]$conc-valueConc[1,]$conc)
}

calcParm <- function(df_input) {
  df <- as.data.frame(df_input)
  df$conc <- as.numeric(df$conc)
  df$viab <- as.numeric(df$viab)

  formula_obj <- viab ~ conc

  out <- tryCatch({
    m1 <- eval(substitute(
      drm(formula_obj,
          data = df,
          fct = LL2.3u(),
          robust = "tukey",
          lowerl = c(0, 0, NA),
          upperl = c(4, 1, NA)),
      list(formula_obj = formula_obj, df = df)
    ))

    fitRes <- m1$coefficients
    m0 <- lm(viab ~ 1, data = df)
    pred_vals <- predict(m1)
    r2 <- cor(pred_vals, df$viab, use = "pairwise.complete.obs")^2
    auc <- calcAUC(m1)

    tibble(
      HS = fitRes[[1]],
      Einf = fitRes[[2]],
      IC50 = fitRes[[3]],
      pF = testF(m0, m1),
      R2 = r2,
      AUC = auc
    )
  }, error = function(e) {
    message("Fit failed for a group: ", e$message)
    tibble(
      HS = NA_real_,
      Einf = NA_real_,
      IC50 = NA_real_,
      pF = NA_real_,
      R2 = NA_real_,
      AUC = NA_real_
    )
  })

  return(out)
}

#function to clean and combine multi-omics data
generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
  
  dataScale <- function(x, censor = NULL, robust = FALSE) {
    #function to scale different variables
    if (length(unique(na.omit(x))) == 2){
      #a binary variable, change to -0.5 and 0.5 for 1 and 2
      x - 0.5
    } else if (length(unique(na.omit(x))) == 3) {
      #catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
      (x - 1)/2
    } else {
      if (robust) {
        #continuous variable, centered by median and divied by 2*mad
        mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
        if (!is.null(censor)) {
          mScore[mScore > censor] <- censor
          mScore[mScore < -censor] <- -censor
        }
        mScore/2
      } else {
        mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
        if (!is.null(censor)) {
          mScore[mScore > censor] <- censor
          mScore[mScore < -censor] <- -censor
        }
        mScore/2
      }
    }
  }
  
  
  allResponse <- list() #list for storing response vector (drug viability)
  allExplain <- list() #list for storing explain matrices (multi-omics)
  
  for (drug in colnames(inclSet$drugs)) {
    y <- inclSet$drugs[,drug]
    
    #get overlapped samples for each dataset 
    overSample <- names(y)
    
    for (eachSet in inclSet) {
      overSample <- intersect(overSample,rownames(eachSet))
    }
    
    y <- dataScale(y[overSample], censor = censor, robust = robust)
    allResponse[[drug]] <- y
  }
  
  #generate explanatory variable table for each seahorse measurement
  expTab <- list()
  
  if ("gen" %in% names(inclSet)) {
    geneTab <- inclSet$gen[overSample,]
    #at least 3 mutated sample
    geneTab <- geneTab[, colSums(geneTab) >= 3]
    vecName <- sprintf("genetic(%s)", ncol(geneTab))
    expTab[[vecName]] <- apply(geneTab,2,dataScale)
  }
  
  if ("RNA" %in% names(inclSet)){
    
    #for PCA
    rnaPCA <- inclSet$RNA[overSample, ]
    colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
    expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
    
  }
  
  if ("meth" %in% names(inclSet)){
    methPCA <- inclSet$meth[overSample,]
    colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
    expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
  }
  
  if ("IGHV" %in% names(inclSet)) {
    IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
    expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
  }
  
  if ("Mcluster" %in% names(inclSet)) {
    methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
    colnames(methTab) <- "methylation cluster"
    expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
  }
  
  if ("pretreated" %in% names(inclSet)){
    preTab <- inclSet$pretreated[overSample,,drop=FALSE]
    expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
  }
  
  comboTab <- c()
  
  for (eachSet in names(expTab)){
    comboTab <- cbind(comboTab, expTab[[eachSet]])
  }
  vecName <- sprintf("all(%s)", ncol(comboTab))
  expTab[[vecName]] <- comboTab
  
  allExplain <- expTab
  
  if (onlyCombine) {
    #only return combined results, for feature selection
    allExplain <-expTab[length(expTab)]
  }
  
  return(list(allResponse=allResponse, allExplain=allExplain))
  
}

#function for multi-variant regression
runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  coefMat <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)
  
  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  for (i in seq(repeats)) {
    if (ncol(X) > 2) {
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      y.pred <- predict(res, s = "lambda.min", newx = X)
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
   
    } else {
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }
    
  }
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}


#function to calculate pair-wise mean viability differences
create_mean_differences <- function(treatments_df, viab_df) {
  # Get drug columns
  drug_cols <- setdiff(names(viab_df), c("patientID", "sampleID", "sampleDate"))
  
  # Get sample1 values
  sample1_long <- treatments_df %>%
    left_join(viab_df, by = c("sample1" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
  
  # Get sample2 values
  sample2_long <- treatments_df %>%
    left_join(viab_df, by = c("sample2" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
  
  # Combine and calculate differences
  differences_long <- sample1_long %>%
    left_join(sample2_long, by = c("patientID", "pair", "drug")) %>%
    mutate(
      difference = sample2_value - sample1_value,
      fold_change = sample2_value / sample1_value,
      percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
    )
  
  return(differences_long)
}


#function to calculate pair-wise single concentration viability differences
create_conc_differences <- function(treatments_df, viab_df) {
  # Get drug columns
  drug_cols <- setdiff(names(viab_df), c("patientID", "sampleID", "sampleDate", "conc_index"))
  
  # Get sample1 values
  sample1_long <- treatments_df %>%
    left_join(viab_df, by = c("sample1" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, conc_index, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
  
  # Get sample2 values
  sample2_long <- treatments_df %>%
    left_join(viab_df, by = c("sample2" = "sampleID")) %>%
    dplyr::select(patientID.x, pair, conc_index, all_of(drug_cols)) %>%
    rename(patientID = patientID.x) %>%
    pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
  
  # Combine and calculate differences
  differences_long <- sample1_long %>%
    left_join(sample2_long, by = c("patientID", "pair", "drug", "conc_index")) %>%
    mutate(
      difference = sample2_value - sample1_value,
      fold_change = sample2_value / sample1_value,
      percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
    )
  
  return(differences_long)
}

#function for univariate cox regression
com_uni <- function(response, time, endpoint, scale =FALSE) {  
  
  if (scale) {
    #calculate z-score
    response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
  }
  
  tryCatch({
    surv <- coxph(Surv(time, endpoint) ~ response) 
    resTab <- tibble(p = summary(surv)[[7]][,5], 
                     HR = summary(surv)[[7]][,2], 
                     lower = summary(surv)[[8]][,3], 
                     higher = summary(surv)[[8]][,4],
                      concordance = summary(surv)$concordance[1],
                      concordance_se = summary(surv)$concordance[2])
  }, error = function(err) {
    resTab <- tibble(p = NA, 
                     HR = NA, 
                     lower = NA, 
                     higher = NA,
                       concordance = NA,
                       concordance_se = NA)
  })
  
}

#function for multivariate cox regression
com <- function(response, ighv, TP53_del17p, age, time, endpoint, scale =FALSE) {  
  
  if (scale) {
    #calculate z-score
    response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
    age <- (age - mean(age, na.rm = TRUE))/sd(age, na.rm=TRUE)
  }
  
  tryCatch({
    surv <- coxph(Surv(time, endpoint) ~ response+ighv+TP53_del17p+age) 
    resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
                     p = summary(surv)[[7]][,5], 
                     HR = summary(surv)[[7]][,2], 
                     lower = summary(surv)[[8]][,3], 
                     higher = summary(surv)[[8]][,4])
  }, error = function(err) {
    resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
                     p = NA, 
                     HR = NA, 
                     lower = NA, 
                     higher = NA)
  })
  
}

Functions for plots and tables

#function for network plot of associations
plotSigNet <- function(meanTab) {
  absmax <- function(x) { x[which.max(abs(x))]}
  sumTab <- filter(meanTab, coef != 0) %>% 
    separate(drug, into = c("name","conc"),sep = "_") %>% 
    group_by(name, disease) %>%
    summarise(coef = absmax(coef)) %>%
    ungroup()
  
  nodeTab <- bind_rows(select(sumTab, name, coef) %>% mutate(type = "drug"),
                       tibble(name = unique(sumTab$disease), 
                              coef =15,
                              type = "disease")) %>%
    mutate(id = seq(nrow(.))) %>%
    mutate(direction = ifelse(type == "drug", ifelse(coef >0,"resistant","sensitive"),
                              name))  # Use disease name as the direction for disease nodes
  
  # Add the drug colors to the color palette
  all_colors <- c(colors_diag_IGHV, resistant = "pink", sensitive = "lightblue")
  
  edgeTab <- sumTab %>% mutate(from = seq(nrow(.)), to = nodeTab[match(disease,nodeTab$name),]$id) %>%
    select(from, to)
  
  tg <- tbl_graph(nodes = nodeTab, edges = edgeTab, directed = FALSE)
  
  ggraph(tg, layout = "igraph", algorithm ="nicely") +  
    geom_edge_link(color = "grey90") +
    geom_node_point(aes(size=abs(coef), shape = type, color = direction)) +
    geom_node_text(aes(label = name), size=3) +
    scale_size_continuous(range = c(1,20), guide = FALSE) +
    scale_shape_manual(values = c(disease= 16, drug = 16), guide = FALSE) +
    scale_color_manual(values = all_colors,
                       breaks = c("resistant", "sensitive"),
                       name = "Direction") +
    theme_void()+
    theme(legend.title = element_text(face="bold", size=8), legend.text = element_text(size=8))
}

#function for the lasso heatmap plot
lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
  plotList <- list()
  if (setNumber == "last") {
    setNumber <- length(lassoOut[[1]])
  } else {
    setNumber <- setNumber
  }
  
  for (seaName in names(lassoOut)) {
    #for the barplot on the left of the heatmap
    barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
    barValue <- barValue[order(barValue)]
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }
    
    #for the heatmap and scatter plot below the heatmap
    allData <- cleanData$allExplain[[setNumber]]
    seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
    
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(seaValue)
    seaValue <- seaValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as.tibble(tabValue)
    
    #change scaled binary back to catagorical
    for (eachCol in colnames(tabValue)) {
      if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
        tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
      }
      else {
        tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
      }
    }
    
    tabValue$Sample <- sampleIDs
    #Mark different rows for different scaling in heatmap
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #For continuious value
    matValue$Type[grep("con.",matValue$Var)] <- "con"
    
    #for methylation_cluster
    matValue$Type[grep("cluster",matValue$Var)] <- "meth"
    
    #change the scale of the value, let them do not overlap with each other
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    
    
    #color scale for viability
    idx <- matValue$Type == "con"
    
    myCol <- colorRampPalette(c('dark red','white','dark blue'), 
                              space = "Lab")
    if (sum(idx) != 0) {
      matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
      minViab <- min(matValue[idx,]$Value)
      maxViab <- max(matValue[idx,]$Value)
      limViab <- max(c(abs(minViab), abs(maxViab)))
      scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    } else {
      scaleSeq1 <- round(seq(0,1,0.01), digits=2)
      color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
    }
    
    
    
    #change continues measurement to discrete measurement
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #change order of heatmap
    names(barValue) <-  gsub("con.", "", names(barValue))
    matValue$Var <- gsub("con.","",matValue$Var)
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
    #plot the heatmap
    p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") + 
      theme_bw() + scale_y_discrete(expand=c(0,0)) + 
      theme(axis.text.y=element_text(hjust=0, size=8, color="black"), axis.text.x=element_blank(), 
            axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"),  
            plot.title=element_text(size=8, color="black", face="bold"), 
            panel.background=element_blank(), panel.grid.major=element_blank(), 
            panel.grid.minor=element_blank()) + 
      xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='#DEEBF7', `22`='#9ECAE1',`23` = '#3182BD'),guide=FALSE) + ggtitle(seaName)
    
    #Plot the bar plot on the left of the heatmap
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", fill="lightblue", position = "identity", width=.8, alpha=0.75) + 
      theme_bw() + geom_hline(yintercept=0, size=0.5) + scale_x_discrete(expand=c(0,0.5)) + 
      scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) + 
      theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), axis.text=element_text(size=8, color="black"), panel.border=element_blank(), axis.line.x = element_line(color="black", size=0.5))+
      xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black")
    
    #Plot the scatter plot under the heatmap
    
    # scatterplot below
    scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
    
    p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="grey80", colour="black", size=1.2) + theme_bw() + 
      theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8, color="black"), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
    
    #Scale bar for continuous variable
    
    Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() + 
      scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
    
    #Assemble all the plots togehter
    
    # construct the gtable
    wdths = c(1.5, 0.2, 1.3*ncol(matValue), 0.7, 1)
    hghts = c(0.1, 0.0010*nrow(matValue), 0.16, 0.5)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)
    gg4 = ggplotGrob(Vgg)
    
    ## fill in the gtable
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
    #gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables
    
    
    #plot
    #grid.draw(gt)
    plotList[[seaName]] <- gt
  }
  return(plotList)
}


#function for supplementary tables
stab_theme <- function(ft) {
  ft |>
    theme_alafoli() |>
    autofit() |> 
    fontsize(size = 8, part = "header") |> 
    fontsize(size = 8, part = "body") |> 
    padding(padding.top = 3, padding.bottom = 3, part = "all") |> 
    align(align = "left", part = "body") |>  
    align(align = "left", part = "header") |>
    bold(part = "header") |>
    width(j = 1, width = 1.25) |>
    width(j = 2, width = 3.5) |>
    width(j = 3, width = 0.75) |>
    line_spacing(space = 1, part = "all") |> 
    color(color = "black", part = "all")
}

2 Load data

File path

# Modify according to directory

#filepath <-"../../"
filepath <- "~/Dropbox/Medizin/Forschung/Aktuelle Forschungsprojekte/CLL CPS1000/figures/github/"

Load processed CPS1000 data

load(paste0(filepath,"data/CPS1000_data.RData"))

Description of datasets:
- pheno1000: DMSO-normalized ex vivo viability measurements

  • screen: selection of the first sample of each patient that passed quality requirements (used for the main analysis)

  • screen_allSamples: as above, but including all samples

  • drugs: drugs used in the screen

  • patients: patients and samples investigated in the main analysis

  • metadata: annotation of genetic aberrations in CLL

  • metadata_leukemia: annotation of genetic aberrations in leukemia

  • timeline: in vivo treatments

  • samples_long: samples analyzed in the longitudinal screen

  • treatments: treatment contexts for the longitudinal analysis of paired samples

  • survival: data on overall survival and time to treatment

  • age: age at sampling

  • ddx3x (table, cps1000, literature): information on DDX3X mutations

3 Quality check

3.1 For CLL only

pheno1000.cll <- filter(pheno1000, diagnosis == "CLL")

Define inner negative controls

#annotate edge wells
pheno1000.cll <- mutate(pheno1000.cll, 
                    edge = ifelse(substr(well,1,1) %in% c("A","B","O","P") | substr(well,2,3) %in% c("01","02","23","24"),
                                             TRUE,FALSE))

Define the quality filter.

sdCut <- 0.1  #upper-limit of the variance of inner negative controls
atpCut <- 1e+05 #lower-limit of the basal ATP count

Calculate the variance and median of inner negative controls

innerNeg <- filter(pheno1000.cll, !edge, Drug == "DMSO") %>%
  group_by(sampleID, patientID, batch) %>% 
  summarise(basalATP = median(val, na.rm = TRUE), sdVal = sd(normVal, na.rm = TRUE))
## `summarise()` has grouped output by 'sampleID', 'patientID'. You can override
## using the `.groups` argument.

Number of filtered samples

sampleRemove <- filter(innerNeg, basalATP <= atpCut | sdVal >= sdCut)

#numer of samples being removed
nrow(sampleRemove)
## [1] 14

Plot

cll_qa <- mutate(innerNeg, discarded = ifelse(sampleID %in% sampleRemove$sampleID, "yes","no"))
cll_qa_plot <- ggplot(cll_qa, aes(x=log10(basalATP), y = sdVal)) +
  geom_point(aes(color= discarded), size =1, alpha=0.6) +
  geom_hline(yintercept = sdCut, color= "grey60", linetype = "dashed") +
  geom_vline(xintercept = log10(atpCut), color= "grey60", linetype = "dashed") +
  scale_color_manual(values = c(yes = "red", no = "black")) +
  theme_figures+
  theme(legend.position = "none", plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))+
  labs(x=expression(log[10]*"(baseline ATP count)"),, y="Standard deviation of controls", title = "CLL samples")

3.1.1 For non-CLL

pheno1000.all <- filter(pheno1000, diagnosis != "CLL" & !diagnosis %in% c("", NA)) %>%
  mutate(diagnosis = droplevels(diagnosis))

Inner negative controls values (basal ATP) vs cell count

#annotate edge wells
pheno1000.all <- mutate(pheno1000.all, 
                    edge = ifelse(substr(well,1,1) %in% c("A","B","O","P") | substr(well,2,3) %in% c("01","02","23","24"),
                                             TRUE,FALSE))

innerNeg <- filter(pheno1000.all, Drug == "DMSO", !edge)

medianNeg <- group_by(innerNeg, sampleID, diagnosis) %>% 
  summarise(medVal = median(val), cellCount = mean(cellCount)) %>%
  mutate(ifLowCount = ifelse(cellCount < 10^6,"low count","normal")) %>%
  filter(!is.na(ifLowCount))
## `summarise()` has grouped output by 'sampleID'. You can override using the
## `.groups` argument.

Calculate the mean and variance of inner-negative control wells and lowest concentration wells

medVar <- filter(pheno1000.all, !edge) %>%
  mutate(type = ifelse(Drug == "DMSO", "neg", 
                       ifelse(concIndex == 5, "lowConc","other"))) %>%
  group_by(sampleID, patientID, type) %>% summarise(sdVal = sd(normVal)) %>%
  spread(key="type",value="sdVal") %>%
  mutate(labelText = sprintf("%s (%s)", patientID, sampleID))
## `summarise()` has grouped output by 'sampleID', 'patientID'. You can override
## using the `.groups` argument.
medVar <- arrange(medVar, desc(lowConc))
basalATP <- group_by(innerNeg, sampleID,patientID, diagnosis) %>% 
  summarise(meanVal = mean(normVal), cellCount = mean(cellCount), 
            basalATP = mean(val)) %>%
  ungroup()
## `summarise()` has grouped output by 'sampleID', 'patientID'. You can override
## using the `.groups` argument.
medVar <- left_join(medVar, basalATP, by = c("sampleID","patientID")) %>%
  ungroup()

Define the quality filter.

sdCut <- 0.2  #upper-limit of the variance of lowest concentration wells
atpCut <- 1e+04 #lower-limit of the basal ATP count

Number of filtered samples

sampleRemove.nonCLL <- filter(medVar, basalATP <= atpCut | lowConc >= sdCut) %>%
  distinct(sampleID, patientID)

#numer of samples being removed
nrow(sampleRemove.nonCLL)
## [1] 11

Plot

non_cll_qa <- mutate(medVar, discarded = ifelse(sampleID %in% sampleRemove.nonCLL$sampleID, "yes","no"))
non_cll_qa_plot <- ggplot(non_cll_qa, aes(x=log10(basalATP), y = lowConc)) +
  geom_point(aes(color= discarded), size =1, alpha=0.6) +
  geom_hline(yintercept = sdCut, color= "grey60", linetype = "dashed") +
  geom_vline(xintercept = log10(atpCut), color= "grey60", linetype = "dashed") +
  scale_color_manual(values = c(yes = "red", no = "black")) +
  theme_figures+
  theme(legend.position = "none", plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))+
  labs(x=expression(log[10]*"(baseline ATP count)"),, y="Standard deviation of controls", title = "non-CLL samples")

Multipanel figure

line1 <- plot_grid(cll_qa_plot, non_cll_qa_plot, ncol = 2, labels = c("A", "B"), label_size = 14)

FigS10 <- plot_grid(line1, NULL, nrow = 2, rel_heights = c(0.2,0.8)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "FigS10.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS10.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS10.png", width=42, height=59.4, units = "cm", dpi=600)

4 Drugs

4.1 Drug overview (Fig. 2A)

#prepare data
drugs <- drugs |> as.data.frame()

drugs$Drug <- drugs$Drug |> 
  gsub("\\.", "-", x = _) |>
  gsub("D4476", "D 4476", x = _) |>
  gsub("IRAK1/4 Inhibitor I", "IRAK-1/4 Inhibitor I", x = _) |> 
  gsub("SB-203580", "SB 203580", x = _)

drugs$Drug <- sort(drugs$Drug) 
drug_list <- drugs |> 
  dplyr::select(Drug, Target, Pathway) #add supplier

Plot

Fig2a <- ggplot(drugs, aes(x = factor(Pathway))) + 
geom_bar(aes(fill = Type)) + 
scale_fill_brewer(palette = "PuRd",  direction = 1) +
labs(y="Number of drugs", x="", fill="Drug type") +
theme_figures_angle45_x +
scale_y_continuous(expand = c(0.01,0), breaks=c(1,2,3,4,5,6,7,8)) +
scale_x_discrete(expand = c(0.025,0)) +
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2a

4.2 Drug list

#drug overview
drugs$Drug
##  [1] "10058-F4"             "A-1210477"            "A-674563"            
##  [4] "Afatinib"             "AMI-1"                "AZD1208"             
##  [7] "AZD6738"              "AZD8055"              "BGJ398"              
## [10] "BML-277"              "BMS-509744"           "Ceritinib"           
## [13] "Cisplatin"            "Cobimetinib"          "Cytarabine"          
## [16] "D 4476"               "Dabrafenib"           "Dasatinib"           
## [19] "Doxorubicine"         "Duvelisib"            "EPZ-6438"            
## [22] "Fludarabine"          "Foretinib"            "Ganetespib"          
## [25] "Ibrutinib"            "Idelalisib"           "Imatinib"            
## [28] "IRAK-1/4 Inhibitor I" "JNK-IN-8"             "KU-55933"            
## [31] "Menin-MLL Inhibitor"  "Methotrexate"         "MI-503"              
## [34] "Midostaurin"          "MK-2206"              "Motolimod"           
## [37] "Nutlin-3a"            "Olaparib"             "Onalespib"           
## [40] "ONO-4059"             "OTX015"               "Palbociclib"         
## [43] "Panobinostat"         "PF-3758309"           "PRT062607"           
## [46] "Quizartinib"          "Rapamycin"            "RO5963"              
## [49] "Ruxolitinib"          "SB 203580"            "SCH772984"           
## [52] "Selinexor"            "Selisistat"           "SGC 0946"            
## [55] "Sunitinib"            "Tofacitinib"          "Tozasertib"          
## [58] "Trametinib"           "TW-37"                "Venetoclax"          
## [61] "Vismodegib"           "Volasertib"           "ZM 336372"
#create table
stab3 <- flextable(drug_list) # |> stab_theme()

ft_grob <- gen_grob(stab3, fit = "width", scaling = "min", hjust = 0)

STab3 <- ggplot() +
  annotation_custom(ft_grob) +
  theme_void() +
  theme(plot.margin = margin(t = 1, r = 1, b = 1, l = 1, unit = "cm"))

ggsave(width=21, height=29.7, units = "cm", path=(paste0(filepath,"tables")), filename = "STab3_drugs_and_targets.svg")

5 Patient characteristics

5.1 Diagnoses (Fig. 2C)

#prepare data
patients <- patients |> 
  filter(diagnosis != "") |> 
  as.data.frame()

patients$cohort <- patients$cohort |> 
  str_replace("IC50", "IC50 + CPS1000") |>
  as.factor()
  
Fig2c <- ggplot(patients, aes(x = forcats::fct_infreq(diagnosis))) +
  geom_bar(aes(fill = cohort)) + 
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size=2.5) +
  scale_fill_brewer(palette = "Greens", direction =-1)+
  labs(y="Number of patients", x="", fill="Drug screen")+
  theme_figures_angle45_x +
  scale_y_continuous(expand = c(0.02,0), breaks=seq(0,300,50), limits = c(0,300))+
  scale_x_discrete(expand = c(0.04,0)) +
  theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2c

ggsave(path=(paste0(filepath,"figures")), filename = "Fig2c_disease_distribution.svg")
## Saving 8 x 4 in image

5.2 Longitudinal data (Fig. 2D)

#prepare data
treatments$sample1 <- gsub("-[1234]$", "", treatments$sample1)
treatments$sample2 <- gsub("-[1234]$", "", treatments$sample2)

paste0("number of patients: ",length(unique(treatments$patientID)))
## [1] "number of patients: 110"
#merge data
sample_date <- survival[,1:3]
merged_table <- merge(treatments, sample_date, by.x="sample1", by.y="sampleID", all.x=TRUE) |> 
  merge(patients[,c("patientID", "diagnosis")], by="patientID") |> 
  dplyr::rename(sampleDate1 = sampleDate)

#add date for 2. sampling and calculate time interval
merged_table <- merge(sample_date, merged_table, by.x="sampleID", by.y="sample2", ) |> 
  dplyr::rename(sample2 = sampleID, sampleDate2 = sampleDate) |> 
  dplyr::select(c(patientID, sample1, sample2, sampleDate1, sampleDate2, diagnosis)) |> 
  mutate(diff = sampleDate2-sampleDate1)

#plot
merged_table$diff <- as.numeric(merged_table$diff)
summary(merged_table$diff)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       7     224     515     578     884    1757
Fig2d <- ggplot(merged_table, aes(x="", y=diff)) +
  geom_boxplot(color="black", linewidth=0.5, outliers = FALSE, width=0.4) +
  geom_jitter(width = 0.1, alpha = 0.75, size=1, color="#74c476")+ 
  theme_figures+
  labs(x = "", y = "Days between samples")+
  theme(axis.ticks.x = element_blank())+
  scale_y_continuous(expand = c(0.02,0), breaks=c(seq(0,1750,500))) +
  theme(plot.margin = margin(t=1, r=1, l=1, b=1, unit="cm"))
Fig2d

ggsave(path=(paste0(filepath,"figures")), filename = "Fig2d_interval_samples.svg")
## Saving 4 x 4 in image

5.3 Genetic alterations in the CLL cohort (Fig. 2E)

#prepare data
#remove del5IgH
oncoprint_data <- merge(patients, metadata, by.x="patientID", by.y="Patient.ID") |> 
  filter(diagnosis.x == "CLL") |> 
  dplyr::select(!del5IgH)

#remove the first two columns (IGHV.status and Methylation_Cluster)
matrix_data <- as.matrix(oncoprint_data[, -c(1:13)])
rownames(matrix_data) <- oncoprint_data$patientID 
genetic_variations <- t(matrix_data)

#exclude aberrations with less than 8 events
genetic_variations <- data.matrix(genetic_variations)

genetic_variations_num <- matrix(as.numeric(genetic_variations), ncol = 275)
## Warning in matrix(as.numeric(genetic_variations), ncol = 275): NAs introduced
## by coercion
rownames(genetic_variations_num) <- rownames(genetic_variations)
colnames(genetic_variations_num) <- colnames(genetic_variations)

genetic_variations_num <- as.data.frame(genetic_variations_num)
genetic_variations_num$count <- rowSums(!(is.na(genetic_variations_num) | genetic_variations_num == 0)) 

genetic_variations_num <- genetic_variations_num |> 
  filter(count >7)
  
#re-adjust matrix für heatmap
genetic_variations_num[genetic_variations_num==0]<-""
genetic_variations_num[is.na(genetic_variations_num)]<-""

genetic_variations_red <- dplyr::select(genetic_variations_num, !count)
genetic_variations_red <- as.matrix(genetic_variations_red)

#annotations
#annotation for IGHV status
ighv_anno <- oncoprint_data[,"IGHV.status"]
ighv_anno <- ighv_anno %>% replace(is.na(.), "unknown")
ighv_colors <- colorRampPalette(brewer.pal(9, "Set1"))(length(unique(ighv_anno)))
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))

#annotation for Methylation
meth_anno <- oncoprint_data[,"Methylation_Cluster"]
meth_anno <- factor(meth_anno, levels = c("HP", "IP","LP", "unknown"))
meth_anno <- meth_anno %>% replace(is.na(.), "unknown")
meth_colors <- colorRampPalette(brewer.pal(8, "Set2"))(length(unique(meth_anno)))
annotation_colors_meth <- setNames(meth_colors, unique(meth_anno))

#annotation for center
center_anno <- oncoprint_data[,"project"]
center_anno <- factor(center_anno, levels = c("HD", "HD-DNA", "H68", "MIR34a", "FB", "NOV", "MDACC", "MUN", "HCL_HD_Andrulis", "OX", "BARC", "ZCH_2"))
center_colors <- colorRampPalette(brewer.pal(3, "Paired"))(length(unique(center_anno)))

#change center label
center_anno <- fct_recode(center_anno,
"Zürich" = "ZCH_2",
"Barcelona" = "BARC",
"Heidelberg" = "HD")
annotation_colors_center <- setNames(center_colors, unique(center_anno))

#change IGHV label
ighv_anno <- fct_recode(ighv_anno,
"mutated" = "M",
"unmutated" = "U")
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))

ha <- HeatmapAnnotation(Center = center_anno, IGHV = ighv_anno, Methylation = meth_anno, annotation_name_gp = gpar(fontsize = 8), col = list(Center =  annotation_colors_center, IGHV = annotation_colors, Methylation = annotation_colors_meth), annotation_legend_param = list(
    Center = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    ),
    IGHV = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    ),
    Methylation = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8)
    )))

heatmap_legend_param = list(title="Alteration", at = c("1"), title_gp = gpar(fontsize = 8), labels_gp = gpar(fontsize = 8))

#create the oncoprint
oncoprint <- oncoPrint(
    genetic_variations_red,
    alter_fun = list(
      background = function(x, y, w, h) {
        grid.rect(x, y, w, h, gp = gpar(fill = "#F8F8F8", col = "white"))
      },
      "1" = function(x, y, w, h) grid.rect(x, y, w, h, gp = gpar(fill = "black", col = "white")), col = "white"),
    col = c("1" = "black"), na_col = "white",
    remove_empty_rows = FALSE,
    remove_empty_columns = FALSE,
    row_names_gp = gpar(fontsize = 8),
    column_names_gp = gpar(fontsize = 8), 
    row_names_side = "left", pct_side = "right", pct_digits = 1,
    right_annotation = rowAnnotation(row_barplot = anno_oncoprint_barplot(c("1"), border = FALSE, height = unit(4, "cm"), axis_param = list(side = "bottom", labels_rot = 0))), 
    pct_gp = gpar(fontsize = 8), top_annotation = ha, heatmap_legend_param = heatmap_legend_param)
## All mutation types: 1.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE)

Fig2e <- grid.grabExpr(draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE))

ggsave(Fig2e, path=(paste0(filepath,"figures")), filename = "Fig2e_CLL_genetic_landscape.svg")
## Saving 8 x 6 in image

6 Drug response

6.1 Comparison with previous data sets (Fig. 2B)

## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).

## Saving 8 x 3 in image
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).

6.2 Drug response patterns (Fig. 3A)

#cap supravital viability results (cut-off <1.2)
screen_long$viability[screen_long$viability>1.2] <- 1.2
screen_long$viability[screen_long$viability<0] <- 0

screen_allSamples_long$viability[screen_allSamples_long$viability>1.2] <- 1.2
screen_allSamples_long$viability[screen_allSamples_long$viability<0] <- 0

#show diseases separately with at least 10 samples
pat_nr_per_disease <- patients |> 
  group_by(diagnosis) |> 
  summarize(n_pat = n()) |> 
  arrange(desc(n_pat))

diagnoses_show <- pat_nr_per_disease |> 
  filter(n_pat >=10) |> 
  pull(diagnosis)

df_tsne <- patients[,c("patientID", "diagnosis")] |> 
merge(screen, by="patientID") |> 
  dplyr::select(-patientID, -sampleID) |> 
  mutate(diagnosis= case_when(diagnosis %in% diagnoses_show ~ diagnosis,
                               TRUE ~ "other"))

#split dataframe in measures and subsetting
df_meas <- df_tsne[, !names(df_tsne) %in% "diagnosis"]
df_subset <- df_tsne[, names(df_tsne) %in% "diagnosis"]

#define order
order <- c("AML", "B-ALL", "B-PLL", "CLL", "MCL", "T-ALL", "T-PLL", "other")
df_subset <- factor(df_tsne$diagnosis, levels = order)

#t-sne
set.seed(12)
tsne_results <- Rtsne(df_meas, perplexity=25, check_duplicates = FALSE)

#plot
tsne_plot <- data.frame(
  x = tsne_results$Y[,1], 
  y = tsne_results$Y[,2],
  diagnosis = df_subset
)

Fig3a <- ggplot(tsne_plot) + 
  geom_point(aes(x=x, y=y, color=diagnosis), size=2, alpha=0.8) +
  theme_figures+
  labs(color="Diagnosis")+
  scale_color_manual(values=colors_diag)+
  theme(plot.margin = margin(t=2, b=2, r=1, l=1, unit="cm"))
Fig3a

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3a_tsne.svg")
## Saving 6 x 6 in image

6.3 Mean viability by drug and disease

6.3.1 Overall Heatmap (Fig. 3B)

#prepare data
screen_long_topdiag <- screen_long |> 
  mutate(diagnosis = case_when(diagnosis %in% diagnoses_show ~ diagnosis, 
                               TRUE ~ "other"))

#calculate mean viability
screen_means <- screen_long_topdiag |> 
  group_by(patientID, diagnosis, drug) |> 
    summarize(mean_viab = mean(viability))
## `summarise()` has grouped output by 'patientID', 'diagnosis'. You can override
## using the `.groups` argument.
screen_means
## # A tibble: 28,665 × 4
## # Groups:   patientID, diagnosis [455]
##    patientID diagnosis drug      mean_viab
##    <chr>     <chr>     <chr>         <dbl>
##  1 P0001     CLL       10058-F4      0.998
##  2 P0001     CLL       A-1210477     0.983
##  3 P0001     CLL       A-674563      0.898
##  4 P0001     CLL       AMI-1         1.00 
##  5 P0001     CLL       AZD1208       0.871
##  6 P0001     CLL       AZD6738       0.980
##  7 P0001     CLL       AZD8055       0.912
##  8 P0001     CLL       Afatinib      0.863
##  9 P0001     CLL       BGJ398        1.04 
## 10 P0001     CLL       BML-277       1.06 
## # ℹ 28,655 more rows
#merge with metadata for IGHV status in CLL, drop CLL without IGHV annotation
ighv <- metadata[, names(metadata) %in% c("Patient.ID", "IGHV.status")]

screen_means_ighv <- merge(screen_means, ighv, by.x = "patientID", by.y= "Patient.ID") |>     
  mutate(diagnosis = case_when(IGHV.status == "U" ~ "U-CLL",
                        IGHV.status == "M" ~ "M-CLL",
                        TRUE ~ diagnosis)) |> 
  filter (diagnosis != "CLL") |> 
  dplyr::select(-IGHV.status) 

#prepare for heatmap
df <- screen_means_ighv |> 
  dplyr::select(-diagnosis) |> 
  pivot_wider(names_from = patientID, values_from = mean_viab)

df_use <- df |> 
  dplyr::select(-drug) |> 
  colnames()

df_subset <- df |> 
  dplyr::select(-drug)

#diagnoses
df_diagnoses <- screen_means_ighv |> 
  filter(drug == "10058-F4") |> 
  dplyr::select(patientID, diagnosis)

#add pathways
df_pathway <- drugs[, names(drugs) %in% c("Drug", "Pathway_mod")]

df_combined <- merge(df_pathway, df, , by.x = "Drug", by.y = "drug")

#order pathways and diagnoses
df_combined$Pathway_mod <- factor(df_combined$Pathway_mod,
    levels = c("AKT/mTOR", "B-cell receptor", "Bromodomain/PLK", "Chemotherapy", 
               "DNA damage response", "JAK/STAT", "MAPK", "PI3K", "Stress pathway", "other"))
               
df_diagnoses$diagnosis <- factor(df_diagnoses$diagnosis,
    levels = c("AML", "B-ALL", "B-PLL", "M-CLL", "MCL", "T-ALL", "T-PLL", "U-CLL", "other"))

#scale
x <- data.frame(df_subset)
y <- data.frame(t(scale(t(x))))

#create matrix
rownames(y) = df$drug
colnames(y) = colnames(df_subset)
z <- as.matrix(y)

#annotations
color_fun_reversed <- colorRamp2(c(4, median(z), -4), c("red", "white", "blue"))

ha = HeatmapAnnotation(
  Diagnosis = df_diagnoses$diagnosis, 
  annotation_name_gp = gpar(fontsize = 8), 
  col = list(Diagnosis = colors_diag_IGHV),
  annotation_legend_param = list(
    Diagnosis = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8),
      nrow = 2
    )
  )
)

pway <- as.data.frame(df_combined[,1:2])
pway_ordered = pway[match(rownames(z), pway$Drug), ]

ha2 = rowAnnotation(
  Pathway = pway_ordered$Pathway_mod, 
  annotation_name_gp = gpar(fontsize = 8), 
  col = list(Pathway = colors_pathways_mod),
  annotation_legend_param = list(
    Pathway = list(
      title_gp = gpar(fontsize = 8, fontface = "bold"),
      labels_gp = gpar(fontsize = 8),
      nrow = 2
    )
  )
)

#heatmap
h1 <- Heatmap(z, name = "Z-score", show_row_names = TRUE, show_column_names=FALSE, row_names_gp = gpar(fontsize = 8), width = ncol(z)*unit(0.25, "mm"), height = nrow(z)*unit(3.5, "mm"), col = color_fun_reversed, top_annotation = ha, left_annotation = ha2, show_column_dend = FALSE, row_km = 4, row_title = NULL, 
show_parent_dend_line = FALSE, heatmap_legend_param = list(direction = "horizontal", title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))

set.seed(1234)

#set spacing to legend
ht_opt(HEATMAP_LEGEND_PADDING = unit(1.5, "cm"))

Fig3b <- grid.grabExpr(draw(h1, merge_legends = TRUE, heatmap_legend_side = "bottom"))

# Reset options
ht_opt(RESET = TRUE)  

#save?

6.3.2 Drug-specific heatmaps (Fig. S2)

6.3.3 Overview of drug effects (Fig. 3C)

#create diagnosis-specific means
screen_mean_diag <- screen_long |> 
  group_by(diagnosis, drug) |> 
  summarize(mean_diag = mean(viability)) |> 
  filter(diagnosis %in% diagnoses_show) |> 
  pivot_wider(names_from = diagnosis, values_from = mean_diag)
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#create heatmap
w <- screen_mean_diag[, names(screen_mean_diag) != "drug"]
rownames(w) = screen_mean_diag$drug
## Warning: Setting row names on a tibble is deprecated.
w <- as.matrix(t(w))

set.seed(123)
ha = rowAnnotation(foo = anno_text(rownames(w), gp = gpar(fontsize=8), just="right", offset=1))
## Warning: `offset` is deprecated, use `location` instead.
column_ha = HeatmapAnnotation(foo = anno_text(colnames(w), gp = gpar(fontsize=8), just = "left", offset =0))
## Warning: `offset` is deprecated, use `location` instead.
h2 <- Heatmap(w, name = "Mean viability", left_annotation = ha, 
top_annotation = column_ha, 
show_row_names = FALSE, show_column_names=FALSE, width = ncol(z)*unit(0.45, "mm"), height = nrow(z)*unit(0.4, "mm"), show_row_dend=TRUE, row_dend_side = "left", show_column_dend=FALSE,
row_dend_width = unit(7, "mm"), heatmap_legend_param = list(title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))

Fig3c_top <- draw(h2, heatmap_legend_side = "right", padding = unit(c(2, 2, 0, 2), "mm"))

#extract heatmap order
clustered_col_indices <- column_order(h2)
## Warning: The heatmap has not been initialized. You might have different results
## if you repeatedly execute this function, e.g. when row_km/column_km was
## set. It is more suggested to do as `ht = draw(ht); column_order(ht)`.
clustered_col_names <- colnames(w)[clustered_col_indices]

#create patient-specific means
screen_mean_pat <- screen_long |> 
  filter(diagnosis %in% diagnoses_show) |> 
  group_by(drug, patientID) |> 
  summarize(mean_pat = mean(viability)) |> 
  merge(drugs[,c("Drug", "Category")], by.x="drug", by.y="Drug") |> 
  mutate(drug = factor(drug, levels = clustered_col_names)) |> 
  arrange(drug)
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
#add boxplots  
Fig3c_bottom <- ggplot(screen_mean_pat, aes(x=drug, y=mean_pat, color=Category))+ 
  geom_boxplot(alpha=0.6, outlier.shape=NA) +
  geom_jitter(alpha=0.1, size=0.2, width = 0.15)+
  theme_figures+
  theme(axis.text.x = element_text(color="black", size=8, angle=90, hjust=1, vjust=0.5), 
        legend.position = "bottom",
        panel.spacing = unit(0, "mm"),
        plot.margin = margin(t=0, b=2, r=33, l=21, unit = "mm"))+
  scale_y_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2)) +
  scale_x_discrete(name="")+
  scale_color_manual(values=colors_drug_type, name =  "Drug type", labels=c("Chemotherapy", "Kinase inhibitor", "Other"))
Fig3c_bottom
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Fig3c_bottom_aligned <- ggdraw(Fig3c_bottom) +
  theme(plot.margin = margin(0, 0, 0, 0))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Fig3c <- plot_grid(grid.grabExpr(draw(h2, heatmap_legend_side = "right")), Fig3c_bottom_aligned, ncol=1, rel_heights = c(0.5, 0.5))

#save?

6.3.4 Disease-specific drug effects

6.3.4.1 Overview (Fig. S3 and S4)

#create plots
aml <- create_boxplot(screen_means_ighv, "AML", colors_diag_IGHV, "AML")
ball <- create_boxplot(screen_means_ighv, "B-ALL", colors_diag_IGHV, "B-ALL")
tall <- create_boxplot(screen_means_ighv, "T-ALL", colors_diag_IGHV, "T-ALL")
rowS2 <- plot_grid(aml, ball, tall, NULL, NULL, ncol = 5)

FigS2 <- plot_grid(rowS2, NULL, nrow = 2, rel_heights = c(0.5, 0.5)) &
  theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "FigS2.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS2.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS2.png", width=42, height=59.4, units = "cm", dpi=600) #remove later

bpll <- create_boxplot(screen_means_ighv, "B-PLL", colors_diag_IGHV, "B-PLL")
mcll <- create_boxplot(screen_means_ighv, "M-CLL", colors_diag_IGHV, "M-CLL")
mcl <- create_boxplot(screen_means_ighv, "MCL", colors_diag_IGHV, "MCL")
ucll <- create_boxplot(screen_means_ighv, "U-CLL", colors_diag_IGHV, "U-CLL")
tpll <- create_boxplot(screen_means_ighv, "T-PLL", colors_diag_IGHV, "T-PLL")
rowS3 <- plot_grid(bpll, mcll, mcl, ucll, tpll, ncol = 5)

FigS3 <- plot_grid(rowS3, NULL, nrow = 2, rel_heights = c(0.5, 0.5)) &
  theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "FigS3.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS3.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS3.png", width=42, height=59.4, units = "cm", dpi=600) #remove later

6.3.4.2 Heatmap (Fig. 3D)

#prepare data
diagnoses_show_ighv <- c(diagnoses_show, "U-CLL", "M-CLL")
diagnoses_show_ighv <- diagnoses_show_ighv[diagnoses_show_ighv != "CLL"]

viabTab.mean <- screen_means_ighv |> 
  filter(diagnosis %in% diagnoses_show_ighv)

#create p-values
allDiag <- unique(viabTab.mean$diagnosis)

pTabAll <- lapply(allDiag, function(diagBase) {
  #calculate p value matrix
  pTab <- lapply(allDiag[allDiag != diagBase], function(diagCompare) {
    testTab <- filter(viabTab.mean, diagnosis %in% c(diagBase, diagCompare))  |> 
      mutate(diagnosis = factor(diagnosis, levels = c(diagBase,diagCompare))) 
    res <- group_by(testTab, drug) |> do(tTest(.$mean_viab, .$diagnosis)) |>
      mutate(diagCompare = diagCompare, diagBase = diagBase)
    res
  }) |> bind_rows() |> ungroup() |> mutate(p.adj = p.adjust(p, method = "BH"))
}) |> bind_rows()

#prepare data table for plot 
plotTab <- pTabAll |> mutate(p = ifelse(p < 1e-12, 1e-12, p)) |>
  mutate(pSign = -log10(p)*sign(diff)) |>
  mutate(pSign = ifelse(p.adj < 0.1, pSign, 0)) |>
  ungroup()

#order drugs by their association similarity (hierarchical clustering)
pMat <- mutate(plotTab, diagPair = paste0(diagBase,"_",diagCompare)) |>
  dplyr::select(drug, pSign, diagPair) |>
  spread(key = diagPair, value = pSign) |> data.frame() |>
  column_to_rownames("drug") |> as.matrix()

#cut
set.seed(123)
nClust = 9
hcRes <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hcRes$order]
clustMap <- cutree(hcRes,nClust)

diagOrder <- c("U-CLL", "M-CLL","MCL", "B-PLL", "T-PLL", "B-ALL", "T-ALL","AML")
plotTab <- mutate(plotTab, drug = factor(drug, levels = drugOrder), 
                  diagBase = factor(diagBase, levels= diagOrder), 
                  diagCompare = factor(diagCompare, levels = diagOrder)) |>
  arrange(drug) |> mutate(drugClust = paste0("",nClust-clustMap[as.character(drug)]+1))

#color strips in y-direction
strip <- strip_themed(background_y = elem_list_rect(fill = "white"))

#add pathway annotation
plotTab <- merge(plotTab, drugs[,c("Drug", "Pathway_mod")], by.x="drug", by.y="Drug")

#plot
main_plot <- ggplot(plotTab, aes(x=diagCompare, y = drug, fill = pSign)) + 
  geom_tile(size = 0.3, color = "white") +
  facet_grid2(rows = vars(drugClust), cols = vars(diagBase), scales = "free", space = "free_y", strip = strip) + 
  scale_fill_gradient2(
    high = "blue", mid = "white", low = "red", midpoint = 0)+
  theme_figures_facet_angle60_x+
  labs(fill = "**-log<sub>10</sub>*P*<br>with direction**") +
  theme(legend.title = ggtext::element_markdown())+
  ylab("") + xlab("")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
main_plot

#add pathway annotation
anno_plot <- ggplot(plotTab, aes(x = diagCompare, y = drug, fill = Pathway_mod)) +
  geom_tile(size = 0.3) +
  facet_grid(rows = vars(drugClust), scales = "free", space = "free_y") +
  scale_fill_manual(values = colors_pathways_mod,
                    name = "Drug Class")+
  theme_minimal() +
  theme(axis.text = element_blank(),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_rect(linewidth = 0.75),
        panel.background = element_blank(),
        plot.background = element_blank(),
        strip.text.y = element_blank(),
        panel.spacing = unit(0.15, "lines"), 
        legend.position = "none")

Fig3d <- ggdraw() + 
  draw_plot(main_plot, x=0,y=0,width=1,height=1)+
  draw_plot(anno_plot,x=0.8610,y=0.0713,width=0.03,height=0.901) & 
  theme(plot.margin = margin(t=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3d_pvalue_heatmap.svg", width=42, height=30, units = "cm")

6.3.4.3 Drug ranking

### simplify code!!

#prepare data

#rank drugs
rankTab <- screen_means_ighv |> 
  group_by(patientID, drug, diagnosis) |> 
  summarise(meanViab = mean(mean_viab)) |> 
  dplyr::ungroup()
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
medTab <- screen_means_ighv |> 
  group_by(drug) |> 
  summarise(medVal = median(mean_viab))
rankTab <- left_join(rankTab, medTab, by = "drug") |>  
  mutate(score = meanViab - medVal)

#make rank plot for diseases
rank_score_tab <- rankTab |> 
  group_by(patientID, diagnosis) |> 
  arrange((score)) |> 
  mutate(rank_score=row_number(), l2fc = log2(meanViab)-log2(medVal)) |> 
  ungroup()

#show ranks for AML with venetoclax and cytarabine
aml_cyt <- rank_score_tab |> 
filter(diagnosis %in% c("AML")) |> 
mutate(drug_col = ifelse(drug %in% c("Cytarabine"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0.75,0))+
scale_fill_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
  labs(title="Cytarabine", y="Score", x="Drug rank")+
  theme_bw()+
  theme(axis.text = element_text(color="black", size=8),
  legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
  panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", linewidth = 1, fill = NA))

aml_ven <- rank_score_tab |> 
filter(diagnosis %in% c("AML")) |> 
mutate(drug_col = ifelse(drug %in% c("Venetoclax"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0, 0.75))+
scale_fill_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
  labs(title="Venetoclax", y="Score", x="Drug rank")+
  theme_bw()+
  theme(axis.text = element_text(color="black", size=8),
  legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
  panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", linewidth = 1, fill = NA))

SFig8c <- aml_cyt + aml_ven + plot_annotation(title = "AML", theme = theme(plot.title = element_text(hjust=0.5, size=8, face="bold"))) + theme(plot.margin = margin(t=1, unit = "cm"))
SFig8c

#create drug rankings per disease
drug_ranking_diag <- rank_score_tab |> 
  group_by(drug, diagnosis) |> 
  summarize(median_score = median(score), score_sd = sd(score)) |> 
  arrange(median_score) |> 
  group_by(diagnosis) |>
  mutate(rank_median = row_number())
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
# plot: aml
drug_order_aml <- drug_ranking_diag |> 
  filter(diagnosis == "AML") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_aml)  

rank_aml <- drug_ranking_diag |> 
  filter(diagnosis == "AML") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
rank_aml

# plot: u-cll
drug_order_ucll <- drug_ranking_diag |> 
  filter(diagnosis == "U-CLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ucll)  

rank_ucll <- drug_ranking_diag |> 
  filter(diagnosis == "U-CLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag_IGHV)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))    
          
# plot: m-cll
drug_order_mcll <- drug_ranking_diag |> 
  filter(diagnosis == "M-CLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcll)  

rank_mcll <- drug_ranking_diag |> 
  filter(diagnosis == "M-CLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag_IGHV)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))            
          
# plot: b-all
drug_order_ball <- drug_ranking_diag |> 
  filter(diagnosis == "B-ALL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ball)  

rank_ball <- drug_ranking_diag |> 
  filter(diagnosis == "B-ALL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))      
          
# plot: t-all
drug_order_tall <- drug_ranking_diag |> 
  filter(diagnosis == "T-ALL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tall)  

rank_tall <- drug_ranking_diag |> 
  filter(diagnosis == "T-ALL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))            

# plot: mcl
drug_order_mcl <- drug_ranking_diag |> 
  filter(diagnosis == "MCL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcl)  

rank_mcl <- drug_ranking_diag |> 
  filter(diagnosis == "MCL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))  

# plot: tpll
drug_order_tpll <- drug_ranking_diag |> 
  filter(diagnosis == "T-PLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tpll) 

rank_tpll <- drug_ranking_diag |> 
  filter(diagnosis == "T-PLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))  
          

# plot: bpll
drug_order_bpll <- drug_ranking_diag |> 
  filter(diagnosis == "B-PLL") |> 
  arrange(median_score) |> 
  pull(drug)
  
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_bpll) 

rank_bpll <- drug_ranking_diag |> 
  filter(diagnosis == "B-PLL") |> 
  mutate(direction = sign(median_score)) |> 
  ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
  geom_col(alpha=0.9) +
  geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd), 
  color="black", alpha=0.75, width=0.5, linewidth=0.5)+
  facet_wrap(~diagnosis) +
  labs(title="", y="Median score", x="") +
  theme_bw() +
  guides(color="none")+
  scale_fill_manual(values=colors_diag)+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))          

SFig8b <- (rank_aml+plot_spacer()+rank_ball+plot_layout(widths = c(1, -0.1 ,1)))/(rank_bpll+plot_spacer()+rank_mcll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_mcl+plot_spacer()+rank_ucll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_tall+plot_spacer()+rank_tpll+plot_layout(widths = c(1, -0.1 ,1))) + theme(plot.margin = margin(t=1, unit = "cm"))

#calculate global ranks
#show all diseases
global_rank_tab <- rank_score_tab |> 
group_by(patientID) |> 
mutate(global_score = sum(score)) |> 
arrange(global_score) |> 
group_by(drug) |> 
mutate(global_rank = row_number()) |> 
filter(drug == "Ibrutinib") |> 
#calculate median global rank per diagnosis
group_by(diagnosis) |> 
mutate(global_score_median_diag = median(global_score),
        global_rank_median_diag = median(global_rank))
        

#plot
diagSelected <- c("AML", "B-ALL", "T-ALL", "T-PLL", "B-PLL", "MCL", "U-CLL", "M-CLL")

global_rank_tab <- global_rank_tab |> 
  mutate(diag_col = ifelse(diagnosis %in% diagSelected, diagnosis, "other"))
  
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = sort(diagSelected))

# Calculate the line for all data
reference_line <- global_rank_tab |>
  arrange(global_rank) |>
  dplyr::select(global_rank, global_score)
## Adding missing grouping variables: `diagnosis`
global_rank_tab <- global_rank_tab |> 
  filter(!is.na(diag_col)) 

score_ranking <- global_rank_tab |> 
  arrange(global_score_median_diag) |> 
  distinct(diag_col) |> 
  pull(diag_col)
  
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = score_ranking)

# Now plot with facets
all_diag <- global_rank_tab |> 
  ggplot(aes(x=global_rank, y=global_score)) +
  geom_hline(data = global_rank_tab |> 
               distinct(diag_col, global_score_median_diag),
             aes(yintercept = global_score_median_diag), 
             color="grey90")+
  geom_line(data = reference_line, color="grey30", linewidth=0.5, inherit.aes=TRUE) +
  geom_hline(data = global_rank_tab |> 
               distinct(diag_col, global_score_median_diag),
             aes(yintercept = global_score_median_diag), 
             color="grey80")+
  geom_point(aes(color=diag_col), alpha=0.8, size=1.5) +
  #geom_point(aes(x=global_rank_median_diag, y=global_score_median_diag), color="grey80",size=1.5, shape=21) +
  facet_wrap(~diag_col, ncol=4) +
  scale_alpha_manual(values=c(rep(0.5, length(diagSelected)))) +
  scale_color_manual(values=colors_diag_IGHV)+
  labs(title="", y="Sum of drug scores", x="Sample rank") +
  theme_bw() +
  guides(alpha = "none", color="none")+
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 0, color="black", size=8), 
          panel.grid = element_line(colour = "white"), 
          axis.text.y = element_text(color="black", size=8), 
          axis.title.y = element_text(size=8), 
          axis.title.x = element_text(size=8), 
          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
          strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
          plot.title = element_text(hjust=0.5, size=8, face="bold"), 
          strip.text = element_text(size = 8), 
          plot.margin = margin(l = 0.5, r = 1, unit = "cm"))

SFig8a <- all_diag + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))

#multipanel
row3 <- plot_grid(SFig8c, NULL, ncol = 2, rel_widths = c(0.4, 0.6))

FigS4 <- plot_grid(SFig8a, SFig8b, row3, rel_heights = c(0.2, 0.65, 0.15), ncol = 1, 
                   align = "v", axis = c("l", "r"), 
                   labels=c("A", "B", "C"), label_size=14) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))


ggsave(path=(paste0(filepath,"figures")), filename = "FigS4.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS4.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS4.png", width=42, height=59.4, units = "cm", dpi=600) #remove later

6.3.4.4 Signature drugs at the single concentration level (multivariate binary logistic regression)

set.seed(1118)

#prepare data: select diseases with at least 10 samples
viabMat <- screen_long |> 
  mutate(drug = paste0(drug, "_", conc_index)) |> 
  filter(diagnosis %in% diagnoses_show) %>% 
  group_by(patientID, drug, conc_index) %>% summarize(viab = mean(viability)) %>%  
  ungroup() %>% dplyr::select(-conc_index) %>%
  spread(key = patientID, value = "viab") %>% data.frame() %>%
  column_to_rownames("drug") %>% as.matrix()
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
#remove features that do not show any variance
sds <- apply(viabMat, 1, sd)
viabMat <- viabMat[sds > genefilter::shorth(sds),]

#Remove highly correlated features (may lead to instability of LASSO model)
keepList <- c("Ibrutinib","Cobimetinib","Venetoclax","OTX","Nutlin")
reRes <- removeCorrelated(t(viabMat), cutoff = 0.7, method = "pearson", keep = keepList)
viabMat.re <- t(reRes$reduced)
mapReduce <- reRes$mapReduce

#network plot shows the collapsed drugs
edgeTab <- lapply(names(mapReduce), function(n1) {
  if(length(mapReduce[[n1]]) != 0) {
    tibble(from = n1, to = mapReduce[[n1]])
  }
}) %>% bind_rows()

nodeTab <- bind_rows(tibble(name = unique(edgeTab$from), type = "kept"),
                     tibble(name = unique(edgeTab$to), type = "collapsed")) %>%
  mutate(id = seq(nrow(.)))

edgeTab <- mutate(edgeTab, from = nodeTab[match(from, nodeTab$name),]$id,
                  to = nodeTab[match(to, nodeTab$name),]$id)

drugNet <- tbl_graph(nodes = nodeTab, edges = edgeTab, directed = FALSE)

net <- ggraph(drugNet, layout = "igraph", algorithm ="nicely") +
  geom_edge_link(color = "grey90") +
  geom_node_point(aes(color = type)) +
  geom_node_text(aes(label = name), repel = TRUE, size=3) +
  scale_color_manual(values = c(kept = "red", collapsed = "grey50")) +
  labs(color="Correlated drugs")+
  theme_void() +
  theme(legend.title = element_text(size=8, face="bold"), 
             legend.text = element_text(size=8)) 

FigS6c <- net + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))


#Prepare disease information, separate M-CLL and U-CLL
diseaseTab <- tibble(patID = colnames(viabMat), 
                     diag = metadata[match(colnames(viabMat), metadata$Patient.ID),]$diagnosis, 
                     IGHV = metadata[match(colnames(viabMat), metadata$Patient.ID),]$IGHV.status) %>%
  mutate_all(as.character) %>%
  filter(!(diag == "CLL" & is.na(IGHV))) %>% mutate(diag = ifelse(diag == "CLL", paste0(IGHV,"-",diag), diag)) %>%
  mutate(diag = factor(diag, levels = c("U-CLL","M-CLL", "AML","B-ALL","T-ALL","MCL","B-PLL","T-PLL")))

diasaesTab <- diseaseTab |> drop_na(diag)

viabMat.re <- viabMat.re[,diseaseTab$patID]

#Binominal regression (logistic regression), one disease compared to all others
#fit model

#dropNA
diseaseTab_NA <- diseaseTab[is.na(diseaseTab$diag),]
diseaseTab <- diseaseTab[!is.na(diseaseTab$diag),]

viabMat.re <- viabMat.re[,!(colnames(viabMat.re) %in% diseaseTab_NA$patID)]

diseaseTab$diag <- factor(diseaseTab$diag, levels = unique(diseaseTab$diag))

#run regression
X <- t(viabMat.re)
binRes <- lapply(unique(diseaseTab$diag), function(disease) {
  y <- ifelse(diseaseTab$diag == disease, 1,0)
  res <- runGlm.bin(X,y, method = "lasso", repeats = 50, folds = 3)
})
names(binRes) <- unique(diseaseTab$diag)

#cross validation accuracy (auc) for each disease (how accurate each disease type can be predicted by drug response)
pAuc <- lapply(names(binRes), function(n){
  tibble(auc = binRes[[n]][["aucList"]],
         disease = n)
}) %>% bind_rows() %>% group_by(disease) %>%
  summarise(meanAuc = mean(auc), sdAuc = sd(auc)) %>%
  ggplot(aes(x=disease, y = meanAuc, fill = disease)) + geom_bar(stat="identity") +
  geom_errorbar(aes(ymin = meanAuc-sdAuc, ymax=meanAuc+sdAuc ), width = 0.25) +
  scale_fill_manual(values=colors_diag_IGHV) + 
  labs(x="", y="AUC of ROC", fill="Disease") + 
  scale_y_continuous(expand = c(0.01,0))+
  theme_classic()+
  theme(axis.text = element_text(color="black", size=8), axis.title.y = element_text(size=8), 
        legend.position = "none", 
        axis.line = element_line(size = 0.5))

FigS6e <- pAuc + theme(plot.margin = margin(t=2, l=3, r=3, b=2, unit = "cm"))

#summarise and plot results
sumCoef <- function(coefMat, coefCut = 0, freqCut =1) {
  meanCoef <- rowMeans(abs(coefMat))
  freqCoef <- rowMeans(coefMat != 0)
  subMat <- coefMat[meanCoef > coefCut & freqCoef >= freqCut,,drop=FALSE]
  eachTab <- data.frame(subMat) %>%
     rownames_to_column("drug") %>% gather(key = "rep",value = "coef",-drug) %>%
     mutate(rep = gsub("X","",rep))
  return(eachTab)
} 

coefTab <- lapply(names(binRes), function(d) {
   coefMat <- binRes[[d]]$coefMat
   coefTab <- sumCoef(coefMat=coefMat, freqCut = 1, coefCut =3) %>%
     mutate(disease = d)
}) %>% bind_rows()

#average coefficient for bar plot
meanTab <- coefTab %>% group_by(drug, disease) %>% 
  summarise(meanCoef = mean(coef)) %>% 
  spread(key = disease, value = meanCoef) %>%
  mutate_at(vars(-drug),replace_na,0) %>% 
  gather(key = "disease", value = "coef", -drug)
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
#plot coefficient (drug importance) for each disease
plotList <- lapply(unique(coefTab$disease), function(n) {
  plotTab <- filter(coefTab, disease == n) %>%
    group_by(drug) %>% summarise(meanCoef = mean(coef),
                                 sdCoef = sd(coef)) %>%
    arrange(meanCoef) %>% mutate(drug = factor(drug, levels = drug))
  ggplot(plotTab, aes(x=drug, y=meanCoef)) + geom_bar(stat= "identity") +
    geom_errorbar(aes(ymin = meanCoef - sdCoef, ymax = meanCoef +sdCoef)) +
    ggtitle(n) +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust=1,vjust =0.5),
                       plot.title = element_text(hjust =0.5, face = "bold")) 
})
gridExtra::grid.arrange(grobs = plotList, ncol=1)

#plot mean coefficient bar plot
barPlot <- ggplot(meanTab, aes(x=reorder(drug, drug), y = coef, fill = disease)) + 
  geom_bar(stat = "identity", position = "stack") + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_fill_manual(values = colors_diag_IGHV) + theme_bw() +
  labs(x="", y="Coefficient", fill="Disease")+
  theme_figures_border+
  ylim(-10,10)+
  coord_flip()

pout <- ggdraw() + 
  draw_plot(barPlot)
pout

FigS6d <- pout + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))

#summarize signature drugs in a network plot
set.seed(125)

FigS6f <- plotSigNet(meanTab) + theme(plot.margin = margin(t=1, r=1, l=2, unit = "cm"))
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.

6.3.4.5 Specific drugs

#prepare data
drugs_show <- c("Ibrutinib", "PF-3758309", "AZD8055", "Venetoclax", "Duvelisib", "Cobimetinib", "Quizartinib") 
drug_targets <- c("Ibrutinib" = "BTK", "PF-3758309" = "PAK", "AZD8055" = "mTOR", "Venetoclax" = "BCL2", "Duvelisib" = "PI3K", "Cobimetinib" = "MEK", "Quizartinib" = "FLT3")

screen_long_selected_drugs <- screen_long |> 
  filter(drug %in% drugs_show) |> 
  group_by(drug, diagnosis, conc_index) |>
    mutate(mean_viab=mean(viability), 
    std.error=std.error(viability))

screen_long_selected_drugs$concentration <- as.character(screen_long_selected_drugs$concentration)

#show drug-response for BTK, PAK and mTOR inhibitor
#helper function for plots
viab_plot_list <- list()

viab_plot_list <- map(drugs_show, ~{
  screen_long_selected_drugs |> 
    filter(drug == .x, diagnosis %in% diagnoses_show) |> 
    ggplot(aes(x=concentration, y=mean_viab, group=diagnosis, color=diagnosis)) +
      geom_errorbar(aes(ymin=mean_viab-std.error, ymax=mean_viab+std.error), 
                    width=.5, position=position_dodge(0.01)) +
      geom_line() +
      theme_figures_angle45_x +
      labs(title=paste0(.x, " (", drug_targets[.x], ")"), 
           y="Viability", x=bquote("Concentration ("*mu*"M)"), 
           color="Diagnosis") +
      scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
      scale_color_manual(values=colors_diag)
}) |> 
  set_names(drugs_show)

#combine plots
Fig3e <- viab_plot_list$Ibrutinib + viab_plot_list$`PF-3758309` + viab_plot_list$AZD8055 + 
  plot_layout(ncol=3, guides = 'collect') & 
  theme(legend.position='right', legend.title= element_text(size=8, face="bold"), 
        plot.margin = margin(t=0.5, b=0.5, l=0.5, r=0.5, unit = "cm")) &
  guides(
    color = guide_legend(ncol = 1),
    fill = guide_legend(ncol = 1),
    shape = guide_legend(ncol = 1),
    linetype = guide_legend(ncol = 1)
  )
Fig3e

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3e_viab_btk_pak_mtor.svg")
## Saving 6 x 4 in image
#show drug-response for BCL2 inhibtor (venetoclax)
diagnoses_ven <- c("CLL", "AML", "B-ALL", "T-ALL", "MCL", "Sezary")

screen_long_ven <- screen_long_selected_drugs |> 
  filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven) |> 
  group_by(diagnosis, concentration) |> 
  summarize(med_viab=median(viability))
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#fit dose-response models
screen_long_ven$concentration <- as.numeric(screen_long_ven$concentration)

models_ven <- map(diagnoses_ven, function(dx) {
  data <- screen_long_ven |> filter(diagnosis == dx)
  drm(med_viab ~ concentration, 
      data = data, 
      fct = LL.4(names = c("hill", "min_value", "max_value", "ec_50")))
})
names(models_ven) <- tolower(diagnoses_ven)

#create predictions based on models
predictions_ven <- map(diagnoses_ven, function(dx) {
  data <- screen_long_ven |> filter(diagnosis == dx)
  model <- models_ven[[tolower(dx)]]
  
  new_data <- data.frame(
    concentration = seq(min(data$concentration), max(data$concentration), length.out = 500)
  )
  new_data$pred <- predict(model, newdata = new_data)
  new_data$diagnosis <- dx
  new_data
}) |> 
  bind_rows()

#calculate IC50 values for all diagnoses
ic50_ven <- do.call(rbind, lapply(diagnoses_ven, function(dx) {
  model <- models_ven[[tolower(dx)]]
  coefs <- setNames(model$coefficients, c("hill", "min_value", "max_value", "ec_50"))
  
  ic_50 <- with(as.list(coefs), 
                exp(log(ec_50) + (1 / hill) * log(max_value / (max_value - 2 * min_value))))
  
  data.frame(ic50 = ic_50, diagnosis = dx)
}))
## Warning in log(max_value/(max_value - 2 * min_value)): NaNs produced
#set colors
colors_ven <- c(setNames("darkgrey", "Sezary"), colors_diag)

#order of diagnoses
order_ven <- ic50_ven |> 
  arrange(ic50) |> 
  pull(diagnosis)

#plot
data_ven <- screen_long_selected_drugs |> 
  filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven)
data_ven$diagnosis <- factor(data_ven$diagnosis, levels = order_ven)
data_ven$concentration <- as.numeric(data_ven$concentration)

predictions_ven$diagnosis <- factor(predictions_ven$diagnosis, levels = order_ven)
ic50_ven$diagnosis        <- factor(ic50_ven$diagnosis, levels = order_ven)

Fig3f <- data_ven |> 
  ggplot(aes(x=concentration, y=viability, group=diagnosis, color=diagnosis)) +
    geom_boxplot(aes(group=concentration), alpha = 0.8, width=0.5, outlier.shape = NA) +
    geom_jitter(aes(group=concentration), alpha=0.25, size=1, width = 0.1) +
    facet_wrap(~diagnosis, ncol=6) +
    geom_line(data = predictions_ven, aes(x = concentration, y = pred, color = diagnosis))+
    geom_vline(data = ic50_ven, aes(xintercept = ic50), 
               linetype = "dashed", color = "black", size = 0.4) +
    scale_x_log10(breaks = c(0.001, 0.01, 0.1, 1),
                  labels = trans_format("log10", math_format(10^.x)))+
    scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
    theme_figures_facet+
    labs(title="Venetoclax (BCL2)", x=bquote("Concentration ("*mu*"M)"), y = "Viability") +
    scale_color_manual(values=colors_ven)+
    theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3f
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3f_venetoclax.svg")
## Saving 10 x 4 in image
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
#duvelisib
pi3k_all <- screen_long_selected_drugs |> 
  filter(diagnosis %in% c("B-ALL", "T-ALL"), drug == "Duvelisib") |> 
  ggplot(aes(x=factor(concentration), y=viability, group=interaction(diagnosis, concentration), color=diagnosis)) +
  geom_boxplot(alpha = 0.8, outlier.shape = NA)+
  geom_jitter(alpha = 0.2, size=1, position = position_jitterdodge())+
  labs(title="Duvelisib (PI3K)", y="Viability", x=bquote("Concentration ("*mu*"M)"), color = "Diagnosis")+
  scale_y_continuous(breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
  theme_figures_angle45_x+
  scale_color_manual(values=colors_diag)

Fig3g <- pi3k_all + theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3g

ggsave(path=(paste0(filepath,"figures")), filename = "Fig3g_duvelisib.svg")
## Saving 6 x 4 in image
#cobimetinib
cobi <- screen_long_selected_drugs |> 
  filter(diagnosis %in% c("CLL", "HCL", "T-PLL"), drug == "Cobimetinib") |> 
  ggplot(aes(x=factor(concentration), y=viability, group=diagnosis, color=diagnosis)) +
  geom_boxplot(aes(group=concentration), alpha = 0.8, width = 0.5, outlier.shape = NA)+
  geom_jitter(size=1, alpha = 0.25, width=0.1)+
  facet_wrap(~diagnosis, ncol = 3)+
  labs(title="Cobimetinib (MEK)", x = bquote("Concentration ("*mu*"M)"), y = "Viability")+
  theme_figures_facet_angle45_x+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1))+
  scale_color_manual(values=colors_diag)

FigS6a <- cobi + theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))

#quizartinib
quiz <- screen_long_selected_drugs |> 
  filter(diagnosis %in% c("AML", "B-ALL", "CLL"), drug == "Quizartinib") |> 
  ggplot(aes(x=factor(concentration), y=viability, group=diagnosis, color=diagnosis)) +
  geom_boxplot(aes(group=concentration), alpha = 0.8, width = 0.5, outlier.shape = NA)+
  geom_jitter(size=1, alpha = 0.25, width=0.1)+
  facet_wrap(~diagnosis, ncol = 3)+
  labs(title="Quizartinib (FLT3)", x = bquote("Concentration ("*mu*"M)"), y = "Viability")+
  theme_figures_facet_angle45_x+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1))+
  scale_color_manual(values=colors_diag)

FigS6b <- quiz + theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
#FigS5 multipanel
row1 <- plot_grid(FigS6a, FigS6b, ncol = 2, rel_widths = c(0.5, 0.5), labels=c("A", "B"), label_size=14)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 102 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 102 rows containing missing values or values outside the scale range
## (`geom_point()`).
colde <- plot_grid(FigS6d, FigS6e, ncol = 1, rel_heights = c(0.6, 0.4), align = "v", axis = c("l", "r"), labels=c("D", "E"), label_size=14)

row2 <- plot_grid(FigS6c, colde, ncol = 2, rel_widths = c(0.6, 0.4), labels=c("C", ""), label_size=14)

row3 <- plot_grid(FigS6f, ncol = 1, labels=c("F"), label_size=14)
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
FigS5 <- plot_grid(row1, row2, row3, NULL, rel_heights = c(0.15, 0.5, 0.25, 0.1), ncol = 1, 
                   align = "v", axis = c("l", "r")) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later

7 Drug-drug correlations

7.1 Drug-drug correlation heatmap

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning in as_grob.default(plot): Cannot convert object of class NULL into a
## grob.

7.2 Scatter plots for drug-drug correlations (Fig. 4B)

#select drugs
drugs_show_corr <- c("SCH772984", "Cobimetinib", "Ibrutinib", "Idelalisib", "Cobimetinib", "Rapamycin")

#helper function for CLL
corr_drugs_cll <- screen_means |> 
  filter(diagnosis == c("CLL"), drug %in% drugs_show_corr) |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

corr_drugs_aml <- screen_means |> 
  filter(diagnosis == c("AML"), drug %in% drugs_show_corr) |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

corr_drugs <- rbind(corr_drugs_cll, corr_drugs_aml)

plot_corr <- function(data, x, y, xlab, ylab) {
  
  corr_labels <- data |> 
    group_by(diagnosis) |> 
    summarize(corr=cor(.data[[x]], .data[[y]], use = "complete.obs"),
              .groups = "drop")
  
  ggplot(data, aes_string(x = x, y = y)) +
    geom_point(aes(color = diagnosis), alpha = 0.5) +
    geom_smooth(method = "lm", se = TRUE,
                color = "black", fill = "lightgrey") + 
    labs(x = xlab, y = ylab, color="Diagnosis")+
    facet_wrap(~diagnosis)+
    theme_figures + 
    theme(strip.background = element_blank(),
          strip.text.x = element_blank())+
    geom_text(data = corr_labels, aes(x = 0.8, y = 1.1, 
                                      label = paste0("italic(R) == ", round(corr, 2))), size=3, parse = TRUE) +
    xlim(0.4,1.2) + ylim(0.4,1.2) +
    scale_color_manual(values = colors_diag)
}

#plot
p1 <- plot_corr(corr_drugs, "SCH772984", "Cobimetinib", xlab="SCH772984 (ERK)", ylab= "Cobimetinib (MEK)")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2 <- plot_corr(corr_drugs, "Ibrutinib", "Idelalisib", xlab="Ibrutinib (BTK)", ylab= "Idelalisib (PI3K)")
p3 <- plot_corr(corr_drugs, "Ibrutinib", "Cobimetinib", xlab="Ibrutinib (BTK)", ylab= "Cobimetinib (MEK)")
p4 <- plot_corr(corr_drugs, "Rapamycin", "Idelalisib", xlab="Rapamycin (mTOR)", ylab= "Idelalisib (PI3K)")

Fig4b <- p1+plot_spacer()+p2+plot_spacer()+p3+plot_spacer()+p4+plot_layout(ncol=7, widths = c(4, 0.5, 4, 0.5, 4, 0.5, 4), guides = 'collect') & theme(legend.position='right', plot.margin = margin(t=1, l=0.5, unit = "cm"))
Fig4b
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

8 Genotype-phenotype associations in CLL

8.1 P value scatterplot

#prepare data
#pTab <- ... (adapt from Junyans script)

pTab <- read.csv(paste0(filepath,"data/pTable_drug_VS_gene.csv"), sep=";") |> 
  mutate(across(c(p, p.adj, diff), ~as.numeric(gsub(",", ".", as.character(.)))))

#number of significant associations per gene (10% FDR)
plotTab1 <- filter(pTab, p.adj <= 0.1) %>% group_by(gene) %>%
  summarise(number = length(drug)) %>%
  arrange(desc(number)) %>% mutate(gene = factor(gene, levels = gene))

#p value scatter plot (IGHV not shown)
top_genes <- as.vector(unlist(plotTab1[2:8,"gene"]))
showGenes <- sort(top_genes)
append(showGenes, "other")
## [1] "DDX3X"     "del11q"    "del13q"    "del17p"    "gain19p"   "TP53"     
## [7] "trisomy12" "other"
#define colors for each gene
library(RColorBrewer)
colorList <- c(colorRampPalette(brewer.pal(7, 'Set2'))(7), 
               "grey80", "grey90")
  
names(colorList) = c(showGenes, "other", "Below 10% FDR")

colorList["del17p"] <- "#FC8D62" 
colorList["del11q"] <- "#E5C494"
colorList["trisomy12"] <- "#FFD92F"
colorList["DDX3X"] <- "#E78AC3"
colorList["TP53"] <- "#66C2A5"

#define shapes for viability
shapeList <- ifelse(names(colorList) %in% showGenes, 19,1)
names(shapeList) <- names(colorList)
shapeList["other"] <- 19

shapeList <- c(1, 25, 24)
names(shapeList) = c("Below 10% FDR", "Down", "Up")

#order drugs by their association similarity
pMat <- dplyr::select(pTab, drug, gene, p) %>% 
  filter(gene!="IGHV.status") %>%
  spread(key = gene, value = p) %>%
  column_to_rownames("drug") %>% 
  as.matrix()

hc <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hc$order]

#order drugs by p value
pval_list <- pTab |> 
  filter(gene != "IGHV.status") |> 
  group_by(drug) |> 
  summarize(pval = min(p)) |> 
  arrange(pval, descending = FALSE)

drugOrder2 <- pval_list$drug

#prepare plot table
plotTab <- pTab %>% filter(gene != "IGHV.status") %>%
  mutate(gene = ifelse(gene %in% showGenes, gene, "other")) %>%
  mutate(gene_FDR = ifelse(p.adj < 0.1, gene, "Below 10% FDR"),
         drug = factor(drug, levels = drugOrder2),
         gene = factor(gene, levels = names(colorList)),
         direction = ifelse(gene_FDR == "Below 10% FDR", "Below 10% FDR", ifelse(sign(diff)>0, "Down", "Up"))) 

#plot
Fig5a <- ggplot(plotTab, aes(x=drug, y = -log10(p), color = gene, shape = direction)) +
  geom_point(size =2, alpha = 0.8, stroke = 1) + scale_color_manual(values = colorList) +
  scale_shape_manual(values = shapeList, name = "Viability effect") + theme_bw() +
  labs(x="", y = expression(-log[10]*italic(P)), color = "Genetic aberration")+
  theme_figures_border+
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1),
        panel.grid.major.x = element_line(size=.1, color="grey50", 
                                          linetype = "dotted"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position = c(.95, .95),
        legend.justification = c("right", "top"),
        legend.box.just = "right",
        legend.box = "horizontal", 
        legend.margin = margin(6, 6, 6, 6))+
  geom_hline(yintercept=-log10(0.004706046), linetype = "dashed", linewidth=0.3, color="black")+
  annotate("text", label="FDR 10%", size=3, x=60, y=3)+
  guides(shape = guide_legend(order = 1),
         color = guide_legend(order = 2))

Fig5a <- Fig5a + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5a

ggsave(path=(paste0(filepath,"figures")), filename = "Fig5a_pvalue_scatterplot.svg")
## Saving 8 x 6 in image

8.2 Volcano plots (for IGHV, trisomy12 and TP53)

#IGHV
#perform t-tests for each drug and concentration level
p_values <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(IGHV.status))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, IGHV.status), .groups = "drop") |> 
  rename("meanU" = "mean1", "meanM" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(IGHV.status))) |> 
  group_by(drug, IGHV.status) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "IGHV.status", values_from = "overall_mean") |> 
  mutate(overall_diff_viab = (U-M)/(U)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall <- p_values |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#define order
order_pway2 <- sort(unique(diffviab_p_values_overall$Pathway_mod2))
order_pway2 <- c(order_pway2[order_pway2 != "other"], "other")
diffviab_p_values_overall$Pathway_mod2 <- factor(diffviab_p_values_overall$Pathway_mod2, levels = order_pway2)

#plot
set.seed(123)
volcano_ighv <- ggplot(diffviab_p_values_overall, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "IGHV",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-20, 20)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin U-CLL", y=20, x=-10, size=3)+
  annotate("text", label="more active\nin M-CLL", y=20, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_top <- volcano_ighv + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_top
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#trisomy12
#perform t-tests for each drug and concentration level
p_values_tri12 <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(trisomy12))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, trisomy12), .groups = "drop") |> 
  rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tri12 <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(trisomy12))) |> 
  group_by(drug, trisomy12) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "trisomy12", values_from = "overall_mean") |> 
  rename("mut" = "1", "unmut" = "0") |> 
  mutate(overall_diff_viab = (mut-unmut)/(mut)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tri12 <- p_values_tri12 |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall_tri12, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#plot
diffviab_p_values_overall_tri12$Pathway_mod2 <- factor(diffviab_p_values_overall_tri12$Pathway_mod2, levels = order_pway2)

volcano_tri12 <- ggplot(diffviab_p_values_overall_tri12, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "trisomy12",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-20, 20)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin trisomy12", y=15, x=-10, size=3)+
  annotate("text", label="more active\nin wt", y=15, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_bottom <- volcano_tri12 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_bottom
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#TP53
#perform t-tests for each drug and concentration level
p_values_tp53 <- screen_long |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(TP53))) |> 
  group_by(drug, conc_index) |> 
  summarize(tTest(viability, TP53), .groups = "drop") |> 
  rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |> 
  ungroup() |> 
  mutate(p.adj = p.adjust(p, method = "BH"), 
         neg_log10_p = -log10(as.vector(p)))

#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tp53 <- screen_means |> 
  filter(diagnosis == "CLL") |> 
  merge(metadata, by.x="patientID", by.y = "Patient.ID") |> 
  filter(!(is.na(TP53))) |> 
  group_by(drug, TP53) |> 
  summarize(overall_mean = mean(mean_viab), .groups = "drop") |>  
  pivot_wider(names_from = "TP53", values_from = "overall_mean") |> 
  rename("mut" = "1", "unmut" = "0") |> 
  mutate(overall_diff_viab = (mut-unmut)/(mut)*100)

#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tp53 <- p_values_tp53 |> 
  mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |> 
  group_by(drug) |> 
  summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |> 
  left_join(diffviab_overall_tp53, by="drug") |> 
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug") 

#plot
diffviab_p_values_overall_tp53$Pathway_mod2 <- factor(diffviab_p_values_overall_tp53$Pathway_mod2, levels = order_pway2)

volcano_tp53 <- ggplot(diffviab_p_values_overall_tp53, aes(x = overall_diff_viab, y = logpval, label=drug)) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "black") +
  geom_vline(xintercept = 0, 
             color = "black") +
  geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
  geom_text_repel(
    text.padding = 0.5,
    box.padding = 0.5,
    max.overlaps = 10,
    min.segment.length = 2,
    size=3)+
  theme_figures+
  labs(
    title = "TP53",
    x = "Difference in mean viability (%)",
    y = expression(-log[10]*italic(P)),
    color="Pathway", size="Number of sign. \ndifferent concentrations") +
  scale_x_continuous(limits = c(-15, 15)) +
  scale_color_manual(values=colors_pathways_mod2) +
  guides(color = guide_legend(override.aes = list(size = 4)))+
  annotate("text", label="more active\nin mutated", y=8, x=-7.5, size=3)+
  annotate("text", label="more active\nin wt", y=8, x=7.5, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
FigS7f <- volcano_tp53 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
FigS7f
## Warning: ggrepel: 57 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

8.3 Dose-response relationship in DDX3X-mutated U-CLL

#select B-cell receptor inhibitors, show response for U-CLL
drugs_bcr <- c("Ibrutinib", "ONO-4059", "PRT062607")

df_bcr <- screen_long |>
  filter(diagnosis == "CLL", drug %in% drugs_bcr) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(IGHV.status == "U", !is.na(DDX3X))
  
#plot
bcr_ddx3x <- df_bcr |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
    drug == "ONO-4059" ~ "ONO-4059 (BTK)",
    drug == "PRT062607" ~ "PRT062607 (SYK)",
    TRUE ~ drug)) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=DDX3X)) + 
  geom_boxplot(alpha = 0.8, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.25, position = position_jitterdodge(0.1)) + 
  facet_wrap(.~drug) + 
  theme_figures_facet_angle45_x_legend+
  stat_compare_means(aes(group = DDX3X), 
                    method = "t.test",
                    label = "p.signif",
                    label.y = 1.1, 
                    size=3)+
  labs(y = "Viability", x = bquote("Concentration ("*mu*"M)"), title ="U-CLL")+
  scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated")) +
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.15)) +
  guides(color = guide_legend(override.aes = list(label = "")))

Fig5c <- bcr_ddx3x + theme(plot.margin = margin(t=0.5, l=1, r=1, b=1, unit = "cm"))
Fig5c

8.3.1 BTK vs PI3K in DDX3X mutated CLL

#select ibrutinib and idelalisb, show response for U-CLL
drugs_ddx3x <- c("Ibrutinib", "Idelalisib")

btk_pi3k <- screen_long |>
  filter(diagnosis == "CLL", drug %in% drugs_ddx3x) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(IGHV.status == "U", !is.na(DDX3X)) |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "Idelalisib" ~ "Idelalisib (PI3K)")) |> 
  pivot_wider(names_from = "drug", values_from = "viability")

#plot
btk_pi3k_ddx3x <- btk_pi3k |> 
    ggplot(aes(x=`Idelalisib (PI3K)`, y=`Ibrutinib (BTK)`, color=DDX3X)) + 
    geom_point(alpha = 0.75) + 
    geom_abline(slope=1, linetype="dashed")+
    scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated"))+
    theme_figures_facet+
    scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
    scale_x_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
    guides(color = guide_legend(override.aes = list(label = ""))) +
    facet_wrap(~concentration, ncol=5)+
    labs(title=expression(bold(paste("Concentration (", mu, "M)"))), x="Idelalisib (PI3K)", y= "Ibrutinib (BTK)")

SFig11b <- btk_pi3k_ddx3x + theme(plot.margin = margin(l = 1, r = 1, t=1, b=1.5, unit = "cm"))
SFig11b

8.4 Lasso model

## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.

8.5 del11q

#select B-cell receptor inhibitors
del11q <- screen_means |>
  filter(diagnosis == "CLL", drug %in% drugs_bcr) |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "del11q")], by.x = "patientID", by.y = "Patient.ID") |> 
  filter(!is.na(IGHV.status), !is.na(del11q)) |> 
  group_by(patientID, drug) |> 
  mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "ONO-4059" ~ "ONO-4059 (BTK)", drug == "PRT062607" ~ "PRT062607 (SYK)"))

del11q_ighv <- del11q |> 
  filter(drug == "Ibrutinib (BTK)") |> 
  ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) + 
  scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
  scale_shape_discrete(name="IGHV", labels=c("unmutated", "mutated")) +
  facet_wrap(~drug)+
  theme_figures_facet+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))+
  labs(y = "Mean viability", x="", title="CLL") + 
  stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
  scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
  guides(color = guide_legend(override.aes = list(label = "")))

SFig10a <- del11q_ighv + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10a

#show effect of B-cell receptor inhibitors in U-CLL only
del11q_ucll <- del11q |> 
  filter(IGHV.status == "U") |> 
  ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) + 
  scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
  facet_wrap(~drug)+
  theme_figures_facet+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right",
        legend.title = element_text(size=8, face="bold"), 
        legend.text=element_text(size=8), 
        legend.key.size = unit(0.5, 'cm'))+
  labs(y = "Mean viability", x="", title="U-CLL") + 
  stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
  scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
  guides(color = guide_legend(override.aes = list(label = "")), shape = "none")

SFig10b <- del11q_ucll + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10b

8.6 trisomy(12) in M-CLL and U-CLL

8.7 Fludarabine and nutlin-3a in TP53-mutated CLL

9 DDX3X

9.1 Overview of mutations

#DDX3X mutations in CPS1000
ddx3x_table
## # A tibble: 8 × 8
##   ID    Sex   IGHV  `DNA mutation` `Amino acid alteration`  Exon
##   <chr> <chr> <chr> <chr>          <chr>                   <dbl>
## 1 P0293 M     U     c.845dupG      p.C282fs                    9
## 2 P0538 M     U     c.1115_1122del p.E372fs                   10
## 3 P0696 F     U     c.A991G        p.M331V                    10
## 4 P0711 M     U     c.C1229G       p.S410C                    12
## 5 P0880 F     U     c.C1184T       p.T395I                    11
## 6 P0987 M     U     c.C1120T       p.Q374X                    10
## 7 P1027 M     U     c.T1256G       p.L419R                    11
## 8 P1075 M     U     c.C625T        p.Q209X                     6
## # ℹ 2 more variables: `Type of alteration` <chr>, `Allele frequency` <dbl>
ddx3x_table$`Allele frequency` <- round(ddx3x_table$`Allele frequency`,2)

#create table
ft <- flextable(ddx3x_table) |> 
  theme_alafoli()
  
ft <-  ft |> 
  flextable::fontsize(size = 8, part = "header") |> 
  flextable::fontsize(size = 8, part = "body") |> 
  flextable::padding(padding.top = 3, padding.bottom = 3, part = "all") |> 
  flextable::align(align = "center", part = "all") |> 
  flextable::bold(part = "header") |>
  flextable::width(j=c(1,2,3,6,8), width = 0.1) |> 
  flextable::width(j=c(4,7), width = 0.25) |>
  flextable::width(j=c(5), width = 0.5) |>
  flextable::line_spacing(space = 1.15, part = "all") |>
  flextable::color(color = "black", part = "all") |> 
  #add header with Greek letter mu (μ)
  flextable::vline(j = "ID")
  
SFig11a <- gen_grob(ft, fit = "width", scaling = "min", hjust = 0)

9.1.1 Lollipop plot

#prepare data
mutations <- ddx3x_cps1000[,c("position","mutation", "type_adjusted")]
mutations$frequency <- c(rep(1,times=8))

ddx3x_literature <- ddx3x_literature |>  
  group_by(position) |>  
  mutate(count = n())

mutations_lit <- ddx3x_literature[,c("position","mutation", "type_adjusted")]
mutations_lit$frequency <- ddx3x_literature$count*(-1)

mutations <- rbind(mutations, mutations_lit)

mutations <- mutations |> 
  mutate(type_adjusted = case_when(
    type_adjusted == "multiple" ~ "Multiple",
    TRUE ~ type_adjusted
  ))

#combine mutations into one label
mutations_combined <- mutations %>%
  group_by(position, frequency) %>%
  summarise(
    mutations = paste(unique(mutation), collapse = ", "),
    .groups = "drop"
  )

#protein domain data (provide source)
domains <- data.frame(
  start = c(182, 414),
  end = c(404, 544),
  domain = c("Helicase domain 1", "Helicase domain 2")
)

RNA_domains <- data.frame(
  start = c(274, 302, 324, 445, 471, 495),
  end = c(280, 303, 342, 450, 480, 504),
  domain = c("RNA binding")
)

ATP_domains <- data.frame(
  start = c(343, 525),
  end = c(352, 536),
  domain = c("ATP binding")
)

color_data <- rbind(domains,ATP_domains,RNA_domains)

#protein length
protein_length <- 662

#set colors
mycolors_type <- colorRampPalette(brewer.pal(6, "Set2"))(6)
names(mycolors_type) <- unique(mutations$type_adjusted)

mycolors_domain <- colorRampPalette(brewer.pal(4, "Set3"))(4)
mycolors_domain<- alpha(mycolors_domain, 1)
names(mycolors_domain) <- c("Helicase domain 1", "Helicase domain 2", "ATP binding", "RNA binding")

#plot
lollipop <- ggplot() +
  #add mutation lollipops
  geom_segment(data = mutations,
               aes(x = position, xend = position,
                   y = 0, yend = frequency),
               color = "grey70", alpha = 0.5, size = 0.25) +
  
  geom_point(data = mutations,
             aes(x = position, y = frequency, color = type_adjusted), 
             size = 2) +
  
  #base rectangle representing the gene
  statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
                ymin = -0.25, ymax = 0.25),
            fill = "white", color = "black", size=0.4) +
  
  #add protein domains as colored rectangles
  geom_rect(data = domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25, fill = domain),
            color = "white", size=0.25) +
 
  #add RNA binding domains
  geom_rect(data = RNA_domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25,
                fill = domain),
            color = "white", size=0.25) +  
            
  #add ATP binding domains
    geom_rect(data = ATP_domains,
            aes(xmin = start, xmax = end,
                ymin = -0.25, ymax = 0.25,
                fill = domain),
            color = "white", size=0.25) +
            
  #add base rectangle with transparent fill
  statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
                ymin = -0.25, ymax = 0.25),
            fill = "transparent", color = "black", size=0.4) +
            
  #add mutation labels
  geom_text_repel(data = mutations_combined,
                aes(x = position, y = frequency,
                    label = mutations),
                size = 3,
                color = "black",
                angle = 90,
                force = 0.1,         
                force_pull = 0.1,    
                point.padding = 0.5,
                box.padding = 0.3,
                min.segment.length = 0,
                segment.color = "grey80",
                segment.alpha = 0.5,
                segment.size = 0.25,
                nudge_y = ifelse(mutations_combined$frequency < 1, -1, 0.5),
                max.overlaps = Inf,
                max.iter = 3000)+
  
  #add domain labels
  geom_text(data = color_data,
            aes(x = (start + end)/2, y = 0,
                label = "")) +
  
  #labels
  labs(
    x = "Amino acid position", y = "Number of mutations", color = "Type of mutation", fill= "Domain", title = "DDX3X"
  ) +
  
  #customize theme
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 8, color = "black"),
        axis.title.y = element_text(size = 8, hjust = 0.45), axis.title.x = element_text(size = 8),
        axis.ticks.x = element_line(color = "black", size = 0.5),  # Add x-axis ticks
        axis.ticks.y = element_line(color = "black", size = 0.5),  # Add y-axis ticks
        axis.ticks.length = unit(5, "pt"),  # Set the length of tick marks
        legend.position = "right", 
        legend.title = element_text(size = 8, face="bold"), 
        legend.key.size = unit(0.5, 'cm'),
        plot.title = element_text(hjust=0.5, size=8, face="bold")
        ) +

  #scale adjustments
  scale_y_continuous(
    expand = c(0, 0), limits = c(-9, 3), breaks = (seq(-6, -1, 1)), labels = c(6,5,4,3,2,1))+
  scale_x_continuous(limits = (c(-20,680)), breaks = c(1,100,200,300,400,500,600,662), expand = (c(0,0))) +
  geom_segment(aes(x = 1, y = -9, xend = 662, yend = -9), color = "black", size = 0.75)+
  geom_segment(aes(x = -20, y = -6.015, xend = -20, yend = -0.985), color = "black", size = 0.75)+
  scale_color_manual(values=mycolors_type)+
  scale_fill_manual(values=mycolors_domain)+
  guides(fill = guide_legend(override.aes = list(colour = NA)))
  
Fig5e <- lollipop + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5e

9.2 RNA-seq

## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 740 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## 
## out of 15127 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 13, 0.086%
## LFC < 0 (down)     : 4, 0.026%
## outliers [1]       : 73, 0.48%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 17
## [1] "Intercept"                          "condition_cps_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## log2 fold change (MAP): condition cps unmutated vs mutated 
## Wald test p-value: condition cps unmutated vs mutated 
## DataFrame with 15127 rows and 5 columns
##                   baseMean log2FoldChange     lfcSE     pvalue      padj
##                  <numeric>      <numeric> <numeric>  <numeric> <numeric>
## ENSG00000000003     1.9101     0.00289367 0.0846446 0.63193053  0.903934
## ENSG00000000419  2258.7670     0.36187463 0.3585549 0.00449449  0.393372
## ENSG00000000457  1022.1373    -0.02142053 0.0837562 0.39599811  0.805297
## ENSG00000000460  1218.4487    -0.00499457 0.0824318 0.79740023  0.957569
## ENSG00000000938 29787.4417    -0.01781967 0.0846137 0.39643908  0.805324
## ...                    ...            ...       ...        ...       ...
## ENSG00000273045   62.32308   -0.000905656 0.0817699   0.962356  0.994221
## ENSG00000273079   19.23316    0.004005899 0.0849180   0.110130  0.603490
## ENSG00000273155    2.72231    0.002674422 0.0842020   0.799143  0.957902
## ENSG00000273173   98.67154   -0.009399929 0.0833434   0.594818  0.892583
## ENSG00000273294    8.97583    0.008131371 0.0835206   0.641268  0.908161
##                             pathway     pval     padj log2err    ES   NES  size
##                              <char>    <num>    <num>   <num> <num> <num> <int>
## 1: HALLMARK_TNFA_SIGNALING_VIA_NFKB 5.41e-14 2.70e-12   0.965 0.523  2.25   188
## 2:          HALLMARK_MYC_TARGETS_V1 2.48e-13 6.20e-12   0.933 0.501  2.18   199
## 3:          HALLMARK_G2M_CHECKPOINT 6.21e-08 1.04e-06   0.705 0.438  1.91   196
## 4:   HALLMARK_INFLAMMATORY_RESPONSE 1.05e-06 1.31e-05   0.644 0.430  1.84   171
## 5:             HALLMARK_E2F_TARGETS 1.56e-06 1.56e-05   0.644 0.413  1.80   199
## 6:       HALLMARK_KRAS_SIGNALING_UP 2.57e-06 2.15e-05   0.627 0.432  1.84   156
##     leadingEdge
##          <list>
## 1: BTG3, G0....
## 2: MCM2, SN....
## 3: TOP2A, M....
## 4: NLRP3, C....
## 5: ATAD2, C....
## 6: G0S2, MA....
## New names:
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(ucll_knis)
## 
##   # Now:
##   data %>% select(all_of(ucll_knis))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 2321 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## 
## out of 16735 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 45, 0.27%
## LFC < 0 (down)     : 13, 0.078%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 58
## [1] "Intercept"                           "condition_knis_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## log2 fold change (MAP): condition knis unmutated vs mutated 
## Wald test p-value: condition knis unmutated vs mutated 
## DataFrame with 16735 rows and 5 columns
##                   baseMean log2FoldChange      lfcSE    pvalue      padj
##                  <numeric>      <numeric>  <numeric> <numeric> <numeric>
## ENSG00000186092    1.67207    8.91947e-06 0.00144270  0.595549  0.998109
## ENSG00000187634  267.57342    1.97106e-06 0.00144264  0.849780  0.998109
## ENSG00000188976 3313.36386    7.83263e-06 0.00144255  0.380360  0.998109
## ENSG00000187961  311.10937    2.16735e-06 0.00144268  0.482642  0.998109
## ENSG00000187583   27.53665    7.18020e-07 0.00144268  0.803700  0.998109
## ...                    ...            ...        ...       ...       ...
## ENSG00000212907    90027.8    5.35529e-06 0.00144268  0.191985  0.969190
## ENSG00000198886   506468.1    5.87490e-06 0.00144266  0.284578  0.998109
## ENSG00000198786   285982.6    3.94295e-06 0.00144267  0.418751  0.998109
## ENSG00000198695    77486.4    2.17944e-06 0.00144266  0.545266  0.998109
## ENSG00000198727   341661.0    3.13512e-06 0.00144266  0.525181  0.998109
##                                       pathway     pval     padj log2err    ES
##                                        <char>    <num>    <num>   <num> <num>
## 1:               HALLMARK_ALLOGRAFT_REJECTION 1.07e-10 5.35e-09   0.839 0.486
## 2:             HALLMARK_INFLAMMATORY_RESPONSE 2.28e-10 5.70e-09   0.827 0.474
## 3:           HALLMARK_TNFA_SIGNALING_VIA_NFKB 2.12e-07 3.53e-06   0.690 0.425
## 4: HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION 6.27e-07 7.84e-06   0.659 0.414
## 5:                 HALLMARK_KRAS_SIGNALING_UP 1.74e-06 1.66e-05   0.644 0.420
## 6:                       HALLMARK_COAGULATION 1.99e-06 1.66e-05   0.627 0.458
##      NES  size  leadingEdge
##    <num> <int>       <list>
## 1:  2.20   177 CD96, NL....
## 2:  2.16   191 CD14, NL....
## 3:  1.94   191 CCL20, S....
## 4:  1.88   190 THBS1, S....
## 5:  1.90   178 NAP1L2, ....
## 6:  1.98   126 THBS1, S....

9.2.1 Hallmark gene sets

#prepare data for plot

#mark top common gene sets
top10_common <- intersect(fgsea_hallmark$pathway[1:10], 
                   fgsea_hallmark_knis$pathway[1:10])

#plot CPS data
hallmark_plot <- fgsea_hallmark |> 
  arrange(pval) |> 
  slice_head(n=10) |> 
  mutate(direction = as.character(ifelse(sign(NES) <0, "Up", "Down")),
         is_top10 = pathway %in% top10_common,
         pathway = as.character(pathway),
         pathway_formatted = as.character(ifelse(is_top10, paste0("**", pathway, "**"), pathway)))

hallmark_plot$pathway_formatted <- factor(hallmark_plot$pathway_formatted, 
                                          levels = sort_by(unique(hallmark_plot$pathway_formatted), 
                                                                   rev(hallmark_plot$pval)))


hallmark <- hallmark_plot |> 
  ggplot(aes(x=-log10(pval), y=pathway_formatted, fill=direction)) +
  geom_col(alpha=0.75) +
  labs(
    y = "",
    x = expression(-log[10]*italic(P)),
    fill = "Direction",
    title = "Hallmark"
  ) +
  theme_figures_border+
  scale_y_discrete(expand = c(0.075,0.01))+
  scale_x_continuous(expand = c(0.025,0))+
  scale_fill_manual(values="lightcoral")

Fig5f <- hallmark +
  theme(axis.text.y = element_markdown(color="black", size=8),
        plot.margin = margin(t=2, l=1, b=1, r=1, unit = "cm"))
Fig5f

9.2.2 Volcano plot

#prepare data for plot

#combine DEres
DEres_comb <- cbind(rbind(DEres_cps, DEres_knis), 
                            source=c(rep("CPS1000", nrow(DEres_cps)), rep("Knisbacher",nrow(DEres_knis))))

#define significant genes
DEres_comb$sign <- ifelse((DEres_comb$padj < 0.1 & 
                                 abs(DEres_comb$log2FoldChange) > 2), TRUE, FALSE)
DEres_comb$source_sign <- ifelse(DEres_comb$sign == TRUE, DEres_comb$source, "non_sign")

#prepare annotation
colors_volcano <- setNames(c("lightgrey", "red", "lightblue"), unique(DEres_comb$source_sig))

#annotate significant genes in top hallmark genesets and map to genes
sets_hallmark <- msigdbr(species = "Homo sapiens", collection = "H")

gene_to_hallmark <- sets_hallmark |> 
  group_by(gene_symbol) |> 
  summarize(hallmark_pathways = paste(gs_name, collapse = "; ")) |> 
  ungroup()

DEres_comb <- DEres_comb |> 
  left_join(gene_to_hallmark, by = c("external_gene_name" = "gene_symbol"))

genes_infl <- c("G0S2", "NLRP3", "CCL20", "CD14", "CD36", "CX3CR1", "IL10", "PDGFB", "SERPINE1", "EDA", "RGS13")

#plot
set.seed(123)
volcano <- DEres_comb |> 
  ggplot(aes(x = log2FoldChange, y = -log10(pvalue))) +
  geom_point(aes(color=source_sign), size=1, alpha = 0.7) +
  geom_label_repel(
    aes(label = ifelse(external_gene_name %in% genes_infl & source_sign != "non_sign", 
      external_gene_name,
      NA
    ), fill=source_sign, segment.color=source_sign),
    color="white",
    box.padding = 0.8, 
    max.overlaps = Inf,
    size=3,
    point.padding = 0.2,
    label.size = 0.25,
    min.segment.length = 0
  )+
  theme_figures+
  labs(
    title = "",
    x = expression(log[2]~FC),
    y = expression(-log[10]*italic(P)),
    color = "Dataset"
  ) +
  scale_color_manual(values = colors_volcano, 
                     labels=c("CPS1000", "CLL-map Portal", "not significant"))+
  scale_fill_manual(values = colors_volcano, 
                    labels=c("CPS1000", "CLL-map Portal", "not significant"),
                    guide = "none")+
  scale_discrete_manual("segment.color", values = colors_volcano, guide = "none")+
  scale_x_continuous(limits=c(-10,10))+
  annotate("text", x=5, y=8, label = "Down in DDX3X\nmutants", size = 3)+
  annotate("text", x=-5, y=8, label = "Up in DDX3X\nmutants", size = 3) +
  guides(color = guide_legend(override.aes = list(size = 2)))

Fig5g <- volcano + theme(plot.margin = margin(t=1.5, l=1, b=1.5, unit = "cm"))
Fig5g
## Warning: Removed 31715 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).

#multipanel Fig.5

Fig5b <- Fig5b_top/Fig5b_bottom + plot_layout(ncol=1, guides = 'collect') & theme(legend.position='right', 
                                                                                  plot.margin=margin(t=0.5, b=0.5, l=0.5, unit="cm"))

col1 <- plot_grid(Fig5a, Fig5c, Fig5d, Fig5e,
                  align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.2, 0.12, 0.25, 0.23), 
                   labels=c("A", "C", "D", "E"), label_size=14)

col2 <- plot_grid(Fig5b, Fig5f, Fig5g, NULL, 
                  align = "v", axis = c("r"),
                  ncol = 1, rel_heights = c(0.5, 0.2, 0.25, 0.05), 
                  labels=c("B", "F", "G", ""), label_size=14)
## Warning: Removed 31715 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).
Fig5 <- plot_grid(col1, col2, ncol=2, rel_widths = c(0.6, 0.4)) & theme(plot.margin=margin(l=1,t=1,b=1,r=1, unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 56 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

9.2.3 Heatmaps

9.2.4 Genes of interest: NLRP3 and DDX3Y

10 Longitudinal analysis

10.1 Global distribution

#prepare data, use all samples
treatments_cll <- treatments

#merge tables, limit to CLL samples
merged <- left_join(samples_long, screen_allSamples[, !colnames(screen_allSamples) %in% "patientID"], by = "sampleID") |> filter(patientID %in% screen_long[screen_long$diagnosis == "CLL",]$patientID)

#add date of sampling
#subtract suffixes from sampleID
merged$sampleID <- gsub("-[1234]$", "", merged$sampleID)

#verify sampleID is unique
length(unique(merged$sampleID)) == nrow(merged)
## [1] TRUE
merged_date <- left_join(merged, survival[, colnames(survival) %in% c("sampleID", "sampleDate")], by = "sampleID") |> 
  relocate(sampleDate, .after = sampleID)

#check NA
sum(is.na(merged_date$sampleDate))
## [1] 0
#transfrom in long data frame
merged_table_long <- merged_date |>
  pivot_longer(cols = c(4:length(merged_date)), names_to = "drug", values_to = "viability")

#add concentration level and remove last two characters from each string in 'drug' column
merged_table_long <- merged_table_long |> 
  mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")),
         drug = str_replace(drug, "_\\d+$", ""))

#create means
merged_table_means <- merged_table_long |> 
  group_by(patientID, sampleID, sampleDate, drug) |> 
  summarize(mean_viab=mean(viability))
## `summarise()` has grouped output by 'patientID', 'sampleID', 'sampleDate'. You
## can override using the `.groups` argument.
#convert to wide format
longitudinal_wide <- merged_table_means |> 
  pivot_wider(names_from = drug, values_from = mean_viab)

#calculate pair-wise viability differences
differences_long <- create_mean_differences(treatments, longitudinal_wide)

#summary of sample pairs with treatment annotation and ex vivo viability data
paste0("number of patients: ", length(unique(differences_long$patientID)))
## [1] "number of patients: 110"
summary_table <- differences_long |> 
  group_by(therapy) |> 
  summarize(sample_pairs = n()/63,
            patients = n_distinct(patientID),
            unique_samples = n_distinct(c(sample1, sample2)),
            unique_sample1 = n_distinct(sample1),
            unique_sample2 = n_distinct(sample2)) |> 
  bind_rows(differences_long |> 
              summarise(therapy = "overall",
                        sample_pairs = n()/63,
                        patients = n_distinct(patientID),
                        unique_samples = n_distinct(c(sample1, sample2)),
                        unique_sample1 = n_distinct(sample1),
                        unique_sample2 = n_distinct(sample2)))
summary_table
## # A tibble: 4 × 6
##   therapy  sample_pairs patients unique_samples unique_sample1 unique_sample2
##   <chr>           <dbl>    <int>          <int>          <int>          <int>
## 1 chemo              47       40             87             41             46
## 2 none               65       65            130             65             65
## 3 targeted           20       19             39             19             20
## 4 overall           132      110            242            111            131
treatments_cll |> 
  pivot_longer(cols=c("sample1", "sample2"), names_to = "sample", values_to = "sampleID") |> 
  dplyr::select(patientID, sampleID) |> 
  group_by(patientID) |> 
  distinct(sampleID) |> 
  summarize(n=n()) |> 
  summary()
##   patientID               n      
##  Length:110         Min.   :2.0  
##  Class :character   1st Qu.:2.0  
##  Mode  :character   Median :2.0  
##                     Mean   :2.2  
##                     3rd Qu.:2.0  
##                     Max.   :4.0
#date intervals of samples
df_date <- differences_long |> 
  filter(drug == "Ibrutinib") |> 
  dplyr::select("patientID", "pair", "sample1", "sample2") |> 
  merge(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by.x = "sample1", by.y ="sampleID") |> 
  rename(sampleDate1 = sampleDate)

df_date <- df_date |> 
  rename(sampleID = sample2) |> 
  left_join(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by ="sampleID") |> 
  rename(sampleDate2 = sampleDate, sample2 = sampleID) |> 
  mutate(diff_date = sampleDate2 - sampleDate1)

df_date$diff_date <- as.numeric(df_date$diff_date)
summary(df_date$diff_date)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       7     224     515     580     892    1757
#sample pairs with more than 0.1 change in viability
differences_long |> 
  filter(abs(difference) > 0.1) |> 
  nrow()/nrow(differences_long)*100
## [1] 4.85
#plot
long_distr <- differences_long |> 
  ggplot(aes(difference))+
    geom_histogram(color="white", fill="grey40", alpha=0.5,bins = 50)+
    geom_vline(xintercept = 0, linetype = "dashed", color = "black")+
  labs(x="Difference in mean viability", y="Count")+
  theme_figures+
  scale_y_continuous(expand = c(0.02,0))+
  scale_x_continuous(expand = c(0.02,0), limits=c(-0.3, 0.3))

Fig6a <- long_distr + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig6a
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

10.2 Illustration of treatment contexts

#create dataframe for treatment context
df <- data.frame(tx = rep(c("once treated", "start between", 
                            "treatment between", "within treatment", 
                            "end between", "untreated"), times=2), timepoint = c(rep("sample 1", times=6),rep("sample 2", times=6)))

context <- df |> 
  mutate(timepoint = as.numeric(timepoint),
         tx = as.numeric(tx)) |> 
  ggplot(aes(x=timepoint, y=tx))+
  labs(x="", y="", title="Treatment contexts")+
  
  # tx = 3: within treatment
  geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "salmon", size = 0.5)+
  geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+
  
  # tx = 2: untreated
  geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+

  # tx = 4: treatment between
  geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+
  geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "salmon", size = 0.5)+
  geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+

  # tx = 5: start between
  geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "salmon", size = 0.5)+
  geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+
  
  # tx = 6: once treated
  geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+
  geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "salmon", size = 0.5)+
  
  # tx = 1: end between
  geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "black", size = 0.5)+
  geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), 
               arrow = arrow(length = unit(0.15, "cm"), type = "closed"), color = "salmon", size = 0.5)+
  
  #sampling timepoints
  geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", linetype = "dashed", size = 0.5)+
  geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", linetype = "dashed", size = 0.5)+
  
  scale_x_continuous(breaks = c(1, 2), labels = c("First\nsample", "Second\nsample"))+
  scale_y_continuous(breaks = c(seq(1,6,1)), labels = sort(unique(df$tx)))+
  theme_figures_border

Fig6b <- context #+ theme(plot.margin = margin(t=2, l=1, r=1, b=1, unit = "cm"))
Fig6b

10.3 Longitudinal drug response evoluation at the single concentration level

#use individual drug responses
merged_table_wide <- merged_table_long |> 
  pivot_wider(names_from = drug, values_from = viability)

#calculate pair-wise viability differences
differences_long_allconc <- create_conc_differences(treatments_cll, merged_table_wide)
## Warning in left_join(., viab_df, by = c(sample1 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 71 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## Warning in left_join(., viab_df, by = c(sample2 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#add concentrations
differences_long_allconc <- differences_long_allconc |> 
  left_join(screen_allSamples_long |> 
              mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")), drug = str_replace(drug, "_\\d+$", "")) |> 
              distinct(drug, conc_index), by = c("drug", "conc_index"))  |> 
  drop_na(conc_index)

#t-test with FDR within status and therapy
df_ttest_allconc <- differences_long_allconc |> 
  group_by(drug, conc_index, status, therapy) |> 
  mutate(pval = t.test(sample1_value, sample2_value, paired = TRUE)$p.value) |> 
  ungroup() |>
  group_by(status, therapy) |> 
  mutate(padj = p.adjust(pval, method="BH"))

#order drugs by their association similarity (hierarchical clustering)
pMat <- df_ttest_allconc |> 
  filter(padj < 0.1) |>
  group_by(drug, conc_index, therapy, status) |> 
  mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>
  ungroup() |> 
  dplyr::select(drug, conc_index, therapy, status, sign_log_pval) |>
  # Create a unique identifier for each condition
  unite("condition", therapy, status, conc_index, sep = "_") |> 
  pivot_wider(names_from = condition, 
              values_from = sign_log_pval,
              values_fn = median) |>
  as.data.frame() |>
  column_to_rownames("drug") |> 
  as.matrix()

#replace NAs with 0 (no significant effect)
pMat[is.na(pMat)] <- 0

#perform hierarchical clustering
set.seed(123)
drug_clusters <- hclust(dist(pMat), method = "ward.D2")
drug_order <- rownames(pMat)[drug_clusters$order]

df_ttest_allconc$drug <- factor(df_ttest_allconc$drug, levels = drug_order)

#rename variables
df_ttest_allconc$therapy <- recode(df_ttest_allconc$therapy,
                                   "chemo" = "Chemo-Immuno",
                                   "targeted" = "Targeted", 
                                   "none" = "None")

#plot heatmap
pval_hmap <- df_ttest_allconc |> 
  filter(padj <0.1) |>
  group_by(drug, conc_index, therapy, status) |> 
  mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>  
  ggplot(aes(x=conc_index, y=drug, fill=sign_log_pval))+
  geom_tile(size = 0.2, color = "white")+
  facet_wrap(therapy~status, ncol=3)+
  scale_fill_gradient2(low="blue", high="red", limits=c(-4, 4))+
  labs(y="", x="Concentration index", fill = "**-log<sub>10</sub>*P*<br>with direction**") +
  theme_figures_facet_angle45_x_legend+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0),
        legend.title = ggtext::element_markdown())

Fig6c <- pval_hmap + theme(plot.margin = margin(l=1, t=1, b=0.5, unit = "cm"))
Fig6c

#percentage of significant changes by therapy condition
sig_summary <- df_ttest_allconc %>%
  group_by(therapy) %>%
  summarise(
    total_tests = n(),
    significant_tests = sum(padj < 0.1, na.rm = TRUE),
    percent_significant = (significant_tests / total_tests) * 100,
    .groups = 'drop'
  )

#calculate chi square test for treatment
chi_test <- df_ttest_allconc |>
  mutate(sign = ifelse(padj < 0.1, "sign", "not_sign")) |>
  with(chisq.test(table(therapy, sign)))

p_value <- chi_test$p.value
chi_stat <- chi_test$statistic

#plot
colors_tx <- c(colorRampPalette(brewer.pal(3, 'Set2'))(3), c(unique(df_ttest_allconc$therapy)))

chi_tx <- sig_summary |> 
  ggplot(aes(x=therapy, y=percent_significant, fill=therapy))+
  geom_bar(stat="identity")+
  theme_figures+
  theme(legend.position = "none")+
  labs(title="All treatments", fill="Drug response\nevolution",
       x="", y="% sampling pairs with sign. change (FDR <0.1)")+
  scale_x_discrete(labels=c("Chemo-immuno", "None", "Targeted"))+
  annotate(
    "richtext",
    x = length(unique(sig_summary$therapy)) / 2 + 0.5,
    y = max(sig_summary$percent_significant)+1.5,
    label = glue::glue(
      "*X*<sup>2</sup> = {round(chi_stat, 3)}, *P* = {format.pval(p_value, digits = 3)}"
    ),
    hjust = 0.5,
    vjust = 1,
    size = 3,
    fill = NA,
    label.color = NA
  )+
  scale_fill_manual(values=colors_tx)+
  ylim(0,15)

FigS8a <- chi_tx + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))

10.4 Longitudinal changes in patients treated with targeted therapies

#create list of sign. drugs in targeted, start between
targ_list <- df_ttest_allconc |> 
  filter(padj <0.1, therapy == "Targeted", status == "start between") |> 
  group_by(drug) |> 
  mutate(median_diff = median(difference)) |> 
  arrange(median_diff)

targ_list$drug <- factor(targ_list$drug, levels = unique(targ_list$drug))

#use differences in mean viability
median_ranking_targeted <- differences_long |> 
  filter(drug %in% unique(targ_list$drug), therapy == "targeted", status == "start between") |> 
  group_by(drug) |> 
  mutate(median_diff = median(difference)) |> 
  arrange(median_diff)

median_ranking_order <- unique(median_ranking_targeted$drug)

#prepare pathway labeling
pathway_annotation <- differences_long |> 
  filter(drug %in% unique(targ_list$drug), 
         therapy == "targeted", 
         status == "start between") |> 
  mutate(drug = factor(drug, levels = median_ranking_order)) |>
  merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")

median_ranking_order <- unique(median_ranking_targeted$drug)
pathway_annotation$Pathway_mod2 <- factor(pathway_annotation$Pathway_mod2, levels = order_pway2)

#plot
long_mean_viab <- pathway_annotation |>
  ggplot(aes(y = difference, x = drug, fill=Pathway_mod2)) + 
  geom_boxplot(alpha = 0.8, outliers = TRUE) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed", linewidth =0.5) +
  scale_fill_manual(values=colors_pathways_mod2)+
  theme_figures_angle45_x+
  labs(fill = "Pathway", y = "Difference in mean viability", x = "")

Fig6d <- long_mean_viab + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig6d

10.4.1 Drugs of interest

#add drug concentration
differences_long_allconc <- differences_long_allconc |> 
  left_join(drugs |> 
              dplyr::select(Drug, Group), by = c("drug" = "Drug")) |> 
  rowwise() |> 
  mutate(concentration = drug_list[[Group]][conc_index]) |> 
  ungroup()
  

#plot OTX015 and TW-37
otx_tw <- differences_long_allconc |> 
  mutate(drug = case_when(drug == "OTX015" ~ "OTX015 (BET)",
                          drug == "TW-37" ~ "TW-37 (BCL2/MCL1)",
                          TRUE ~ drug)) |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("OTX015 (BET)","TW-37 (BCL2/MCL1)"), status == "start between", therapy == "targeted") |> 
  ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
  geom_boxplot(alpha = 0.3, width = 0.5, position=position_dodge(0.75), outlier.shape = NA)+
  geom_point(aes(color = timepoint), alpha = 0.25, 
           position = position_jitterdodge(dodge.width = 0.8, jitter.width = 0.2))+ 
  labs(y = "Viability", color= "Sampling")+ 
  facet_wrap(~drug, ncol=2) + 
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.2, 
                     size=3)+
  labs(x = bquote("Concentration ("*mu*"M)"))+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.38)) +
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)+
  scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
  guides(color = guide_legend(override.aes = list(label = c(""))))+
  theme_figures_facet_angle45_x_legend

Fig6e <- otx_tw + theme(plot.margin = margin(l=1, t=1, unit = "cm"))
Fig6e

#plot BCR inhibitors
bcr <- differences_long_allconc |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("Dasatinib","Ganetespib", "Idelalisib", "AZD8055", "Cobimetinib", "Ibrutinib"), 
         status == "start between", therapy == "targeted") |> 
  mutate(drug = case_when(drug == "AZD8055" ~ "AZD8055 (mTOR)",
                          drug == "Cobimetinib" ~ "Cobimetinib (MEK)",
                          drug == "Dasatinib" ~ "Dasatinib (ABL/LYN)",
                          drug == "Idelalisib" ~ "Idelalisib (PI3K)",
                          drug == "Ganetespib" ~ "Ganetespib (HSP90)",
                          drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
                          TRUE ~ drug)) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) + 
  theme_figures_facet_angle45_x_legend+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  facet_wrap(~drug)+
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.1, 
                     size=3)+
  labs(y = "Viability", color= "Sampling", x = bquote("Concentration ("*mu*"M)"))+
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)

SFig12c <- bcr + theme(plot.margin = margin(t=1, l=1, unit = "cm"))


#plot MCL1 and BCL2 inhibitors
mcl_bcl <- differences_long_allconc |> 
  pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |> 
  filter(drug %in% c("Venetoclax","A-1210477"), status == "start between", therapy == "targeted") |> 
  mutate(drug = case_when(drug == "Venetoclax" ~ "Venetoclax (BCL2)",
                          drug == "A-1210477" ~ "A-1210477 (MCL1)",
                          TRUE ~ drug)) |> 
  ggplot(aes(x=factor(conc_index, levels = sort(unique(conc_index), decreasing = TRUE)), y=viability, color=timepoint))+
  geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) + 
  geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) + 
  theme_figures_facet_angle45_x_legend+
  theme(axis.text.x = element_text(angle=0, hjust=0.5, vjust=0))+
  scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  facet_wrap(~drug, nrow=2)+
  stat_compare_means(aes(group = timepoint), 
                     method = "t.test",
                     paired = TRUE,
                     label = "p.signif",
                     label.y = 1.1, 
                     size=3)+
  labs(y = "Viability", color= "Sampling", x = bquote("Concentration index"))+
  scale_color_manual(labels = c("sample1_value" = "Before treatment", 
                                  "sample2_value" = "On treatment"),
                       values = colors_treatment)

SFig12d <- mcl_bcl + theme(plot.margin = margin(t=1, l=1, r=1, b=0.5, unit = "cm"))
#Fig6 multipanel

col1 <- plot_grid(Fig6a, Fig6b, 
                  align = "v", axis = c("r"), 
                  ncol = 1, rel_heights = c(0.4, 0.4), 
                  labels=c("A", "B"), label_size=14)
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning in geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
col2 <- plot_grid(Fig6c, ncol = 1, 
                  #rel_heights = c(0.8, 0.2),
                  labels=c("C", ""), label_size=14)

col3 <- plot_grid(Fig6d,Fig6e, ncol = 1, align = "v", axis = c("l", "r"),
                  rel_heights = c(0.55, 0.45), 
                  labels=c("D", "E"), label_size=14)

top <- plot_grid(col1, col2, col3, ncol=3, 
                 align = "h", axis = c("t", "b"),
                 rel_widths = c(0.2, 0.35, 0.5))

Fig6 <- plot_grid(top, NULL, nrow =2, rel_heights = c(0.25, 0.75)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.png", width=42, height=59.4, units = "cm", dpi=600)
#FigS8 multipanel

r1c1 <- plot_grid(FigS8a, NULL, 
                  align = "h", 
                  axis = "tb", 
                  ncol = 2, 
                  rel_widths = c(1, 1), 
                  labels = c("A", ""), 
                  label_size = 14)

row1<- plot_grid(r1c1, NULL, ncol = 2, rel_widths = c(0.5, 0.5))

row2 <- plot_grid(SFig12c, SFig12d, align = "h", axis = c("t", "b"), ncol = 2, rel_widths = c(0.65, 0.35), 
               labels=c("B", "C"), label_size=14)

SFig12 <- plot_grid(row1, row2, NULL, nrow =3, 
                    align = "v", axis = c("l"), rel_heights = c(0.15, 0.25, 0.6)) & theme(plot.margin=margin(l=1,t=1,b=1,r=1,unit="cm"))

ggsave(path=(paste0(filepath,"figures")), filename = "FigS8.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS8.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS8.png", width=42, height=59.4, units = "cm", dpi=600)

11 Correlation of drug response metrics with clinical outcomes

## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis', 'patientID'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.

#multipanel
Fig7a <- p1+p2 + plot_annotation("CLL", theme = theme(
  plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
  plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
  plot_layout(guides = "collect")
Fig7a
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Fig7b <- p3+p4 + plot_annotation("AML", theme = theme(
  plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
  plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
  plot_layout(guides = "collect")
Fig7b
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Validation in external dataset

#load data from SMART trial
download.file("https://github.com/PeterBruch/SMARTrial/raw/main/data/SMARTrial_data.RData", 
              destfile = paste0(filepath, "validation/SMARTrial_data.RData"),
              mode = "wb")

load(paste0(filepath, "validation/SMARTrial_data.RData"))

#filter for drugs tested in both datasets
smart <- data_all |> 
  filter(Drug %in% drugs$Drug, diagnosis == "AML")

#create mean viability
smart_means <- smart |> 
  group_by(Drug) |> 
  summarize(viab_mean = mean(Drug_response_mean, na.rm=TRUE)) |> 
  merge(AML_outcomes[,c("Drug", "orr", "crr", "nr_pat", "Category")], by="Drug")

smart_aml_orr <- smart_means |> 
  ggplot(aes(x=viab_mean, y=orr)) +
  geom_point(aes(size=nr_pat, color=Category), alpha=0.8) +
  geom_text_repel(aes(label=Drug), size=3, color="black", 
      box.padding = 0.5,
      point.padding = 0.75,
      min.segment.length = 1,
      force = 1.5)+
  stat_correlation(mapping = use_label("R", "R2", "P", "n"), label.x = "left", size=3)+
  theme_figures_border+
    labs(title = "ORR ~ Mean viability", x = "Mean viability", 
         y = "ORR", color="Drug type", size="Nr. of patients\nin clinical trial")+
  scale_color_manual(values=colors_drug_type, labels=labels_drug_type)+
  ylim(0,100)
  
smart_aml_crr <- smart_means |> 
  ggplot(aes(x=viab_mean, y=crr)) +
  geom_point(aes(size=nr_pat, color=Category), alpha=0.8) +
  geom_text_repel(aes(label=Drug), size=3, color="black",
      box.padding = 0.5,
      point.padding = 0.5,
      max.overlaps=5,
      force = 1.5)+
  stat_correlation(mapping = use_label("R", "R2", "P", "n"), label.x = "left", size=3)+
  theme_figures_border+
  labs(title = "CRR ~ Mean viability", x = "Mean viability", 
       y = "CRR", color="Drug type", size="Nr. of patients\nin clinical trial")+
  scale_color_manual(values=colors_drug_type, labels=labels_drug_type)+
  ylim(0, 50)

FigS10c <- smart_aml_orr+smart_aml_crr + plot_annotation("AML (Liebers et al. 2023)", theme = theme(
  plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
  plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
  plot_layout(guides = "collect")
FigS10c

11.1 Illustration of drug-response metrics

11.2 Correlation coefficients for AML and CLL

#create boxplots for correlations coefficients of drug metrics in AML and CLL
#function to calculate correlations
compute_cor <- function(data, parameters, outcome, diagnosis) {
  cor_data <- sapply(parameters, function(par) {
    cor(data[[outcome]], data[[par]], use = "pairwise.complete.obs", method = "pearson")
  })
  
  data.frame(
    parameter = parameters,
    cor = cor_data,
    diagnosis = diagnosis,
    outcome = outcome
  )
}

parm<- c("HS_mean", "Einf_mean", "pIC50_mean", "R2_mean", "AUC_mean", "viab_mean")

CLL_ORR <- compute_cor(CLL_outcomes, parm, "orr", "CLL")
CLL_CRR <- compute_cor(CLL_outcomes, parm, "crr",  "CLL")

AML_ORR <- compute_cor(AML_outcomes, parm, "orr", "AML")
AML_CRR <- compute_cor(AML_outcomes, parm, "crr",  "AML")

cor_combined <- bind_rows(CLL_ORR, CLL_CRR, AML_ORR, AML_CRR)

#plot
par_list <- cor_combined |> 
  filter(parameter != "pF_mean", diagnosis == "CLL", outcome == "crr") |> 
  arrange(abs(cor)) |> 
  pull(parameter)
  
cor_combined$parameter <- factor(cor_combined$parameter, levels = par_list)  

Fig7d <- cor_combined |> 
filter(str_detect(parameter, "mean")) |> 
  mutate(direction = ifelse(sign(cor) >0, "positive", "negative")) |> 
  filter(parameter != "pF_mean") |> 
  ggplot(aes(y=parameter, x=abs(cor), fill=diagnosis))+
  geom_col(aes(alpha=outcome), position = position_dodge(width=0.8), width=0.8)+
  facet_wrap(~diagnosis, ncol=2)+
  labs(title="", y="Parameter (mean)", x="Absolute Pearson correlation coefficient",
  fill = "Diagnosis", alpha="Outcome")+
  theme_figures_facet_angle60_x+
  scale_fill_manual(values=colors_diag, guide="none")+
  scale_alpha_manual(values=c(1, 0.5), labels = c("CRR", "ORR"))+
  xlim(0,1)+
  scale_y_discrete(labels = c("Hill coefficient", 
                              expression(pIC[50]),  # subscript
                              expression(E[inf]),   # subscript
                              expression(R^2),      # superscript
                              "AUC",
                              "Mean viability"))
  
Fig7d <- Fig7d + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig7d

11.3 Time-to-event analysis

#prepare data: use mean viability in CLL
screen_means_cll <- screen_means |> 
  filter(diagnosis == "CLL")

#merge with survival data, add age
surv_combined <- left_join(
  screen_means_cll,
  screen_long |> distinct(patientID, sampleID),  # Remove duplicates
  by = "patientID"
) |> 
  left_join(survival |> rename(patientID = patID), 
            by = c("sampleID", "patientID")) |> 
  left_join(age |> dplyr::select(-sampleDate), 
            by = c("sampleID", "patientID"))

#merge with genomic aberrations
surv_combined <- left_join(surv_combined, metadata |> 
                             dplyr::select(-diagnosis, -treatment), by = c("patientID" = "Patient.ID")) |> 
  mutate(IGHV.status = case_when(IGHV.status == "M" ~ "1",
                                 IGHV.status == "U" ~ "0",
                                 TRUE ~ IGHV.status))

#summary
length(unique(surv_combined$patientID))
## [1] 275
surv_combined |> 
  group_by(treatedAfter) |> 
  summarize(n=n()/63)
## # A tibble: 3 × 2
##   treatedAfter     n
##   <lgl>        <dbl>
## 1 FALSE          145
## 2 TRUE            99
## 3 NA              31
#perform cox regression for each drug and clinical feature

#univariate analysis for OS: drugs, stratified by IGHV
resOS_uni <- filter(surv_combined, !is.na(OS), !is.na(IGHV.status)) %>%
  group_by(drug, IGHV.status) %>%
  do(com_uni(.$mean_viab, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "OS")

##univariate analysis for TTT: drugs, stratified by IGHV
resTTT_uni <- filter(surv_combined, !is.na(TTT), !is.na(IGHV.status)) %>%
  group_by(drug, IGHV.status) %>%
  do(com_uni(.$mean_viab, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "TTT")

#plot Hazard ratios and p values
plotTab_uni <- bind_rows(resOS_uni, resTTT_uni) %>%
  filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))

cox_uni_strat <- plotTab_uni |> 
  mutate(IGHV.status = ifelse(IGHV.status == "1", "M-CLL", "U-CLL")) |> 
  ggplot(aes(x=drug, y = HR, col = Endpoint, dodge = Endpoint)) + 
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.75)) +
  geom_errorbar(position = position_dodge(width =0.75), 
                aes(ymin = lower, ymax = higher), width = 0.3) + 
  geom_text(position = position_dodge(width =0.75), size = 3,
            aes(y = higher + 0.6, 
                label = ifelse(p < 0.001, 
                               "italic(P)<0.001", 
                               paste0("italic(P)==", sprintf("%1.3f", p)))),
            parse = TRUE) +
  facet_wrap(~IGHV.status, scales = "free") + 
  labs(y="Hazard ratio", x="")+
  coord_flip() + 
  theme_figures_facet+
  scale_color_manual(values=colors_cox)+
  guides(color = guide_legend(override.aes = aes(label = "")))+
  ylim(0,4)

FigS10k <- cox_uni_strat + theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm"))
FigS10k

#multivariate analysis for OS, combine del17p and TP53
resOS <- surv_combined |> 
  mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |> 
  filter(!is.na(OS), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
  group_by(drug) %>%
  do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p, .$age, .$OS, .$died, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "OS")

#multivariate analysis for TTT, combine del17p and TP53
resTTT <- surv_combined |> 
  mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |> 
  filter(!is.na(TTT), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
  group_by(drug) %>%
  do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p,.$age,.$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
  arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
  mutate(Endpoint = "TTT")

#plot Hazard ratios and p values for ibrutinib
plotTab <- bind_rows(resOS, resTTT) %>%
  filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))

multi_cox_ibr <- plotTab |> 
  filter(drug == "Ibrutinib") |> 
  ggplot(aes(x=variable, y = HR, col = Endpoint)) + 
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.75)) +
  geom_errorbar(position = position_dodge(width =0.75), 
                aes(ymin = lower, ymax = higher), width = 0.3) + 
  geom_text(position = position_dodge(width =0.75), size = 3,
            aes(y = higher + 0.75, 
                label = paste0("italic(P)==", sprintf("%1.3f", p))),
            parse = TRUE) +
  labs(y="Hazard ratio", x="")+
  coord_flip() + 
  theme_figures+
  scale_x_discrete(labels = c("TP53_del17p" = "TP53/del(17p)",
                              "response" = "Ex vivo\nresistance to\nibrutinib",
                              "ighv" = "IGHV", 
                              "age" = "Age at\nsampling"))+
  scale_color_manual(values=colors_cox)+
  guides(color = guide_legend(override.aes = aes(label = "")))+
  ylim(0,6)

Fig7e <- multi_cox_ibr + theme(plot.margin = margin(t=1, l=1, b=1, unit = "cm"))
Fig7e

11.3.1 Survival

#survival curves
colors_surv_ibr <- setNames(c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"), c("0,0", "0,1", "1,0", "1,1"))

labels_surv_ibr <- c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", 
                              "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability")

colors_surv_ibr_p <- setNames(c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"), c("U_low", "U_high", "M_low", "M_high"))

#maxstat
km_surv <- surv_combined |> 
  filter(!is.na(IGHV.status), drug == "Ibrutinib")

#OS
res.cut.os <- surv_cutpoint(km_surv, time = "OS", event = "died",
                         variables = c("mean_viab"))

summary(res.cut.os)
##           cutpoint statistic
## mean_viab    0.825      4.55
os_value <- summary(res.cut.os)$cutpoint
res.cat.os <- surv_categorize(res.cut.os)

fit_os <- survfit(Surv(OS, died) ~ mean_viab, data = res.cat.os)

#TTT
res.cut.ttt <- surv_cutpoint(km_surv, time = "TTT", event = "treatment",
                         variables = c("mean_viab"))

summary(res.cut.ttt)
##           cutpoint statistic
## mean_viab    0.895      6.15
ttt_value <- summary(res.cut.ttt)$cutpoint
res.cat.ttt <- surv_categorize(res.cut.ttt)

fit_ttt <- survfit(Surv(TTT, treatment) ~ mean_viab, data = res.cat.ttt)

#plot OS Kaplan-meier curve based on cut-off
km_surv <- km_surv |> 
  mutate(ibr_resistance_os = ifelse(mean_viab < os_value, 0, 1), 
         ibr_resistance_ttt = ifelse(mean_viab < ttt_value, 0, 1))
  
os <- survfit2(Surv(OS, died) ~ ibr_resistance_os+IGHV.status, data = km_surv)

pval <- sub("^p", "", survfit2_p(os))

km_os <- os |>
  ggsurvfit() +
  labs(x = "Years", y = "Overall survival", title = "Overall survival") + 
  add_censor_mark(size = 1, alpha = 0.5) +
  add_risktable(risktable_stats = "n.risk", size = 3) +
  add_risktable_strata_symbol(symbol = "\U2500", size = 10) +
  scale_ggsurvfit() +
  labs(x="Years from sampling")+
  theme_figures+
  theme(legend.position = "bottom")+
  scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", 
                              "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"), 
                     values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
  annotate(
    "text",
    x = 2,
    y = 0.05,
    label = paste0("'Log-rank'~italic(P)", pval),
    parse = TRUE,
    hjust = 1,
    vjust = 0,
    size = 3
  )+
  guides(color = guide_legend(ncol = 2))

Fig7d1 <- km_os

#plot TTT Kaplan-meier curve based on cut-off
ttt <- survfit2(Surv(TTT) ~ ibr_resistance_ttt+IGHV.status, data = km_surv)

pval_ttt <- sub("^p", "", survfit2_p(ttt))

#plot
km_ttt <- ttt |> 
  ggsurvfit() +
  labs(x = "Years", y = "Treatment-free survival", title = "Time to treatment") + 
  add_censor_mark(size = 1, alpha = 0.5) +
  add_risktable(risktable_stats = "n.risk", size = 3) +
  add_risktable_strata_symbol(symbol = "\U2500", size = 10) +
  scale_ggsurvfit() +
  labs(x="Years from sampling")+
  theme_figures+
  theme(legend.position = "bottom")+
  scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", 
                              "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"), 
                     values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
  annotate(
    "text",
    x = 2,
    y = 0.05,
    label = paste0("'Log-rank'~italic(P)", pval_ttt),
    parse = TRUE,
    hjust = 1,
    vjust = 0,
    size = 3
  )+
  guides(color = guide_legend(ncol = 2))

Fig7d2 <- km_ttt

#####

#combine with p values: not yet included
#ttt
#create log-rank p values for all comparisons
km_surv_ttt <- km_surv %>%
  mutate(
    IGHV = ifelse(IGHV.status == "1", "M", "U"),
    ibr_mean = ifelse(ibr_resistance_ttt == "1", "high", "low"),
    group = paste(IGHV, ibr_mean, sep = "_")
  )

#create pairwise group combinations
pairwise_df_ttt <- combn(unique(km_surv_ttt$group), 2, simplify = FALSE) %>%
  map_dfr(~{
    g1 <- .x[1]
    g2 <- .x[2]

    d <- km_surv_ttt %>% filter(group %in% c(g1, g2))
    sd <- survdiff(Surv(TTT) ~ group, data = d)

    tibble(
      group1 = g1,
      group2 = g2,
      pval   = pchisq(sd$chisq, df = 1, lower.tail = FALSE)
    )
  })

#format p-values
format_pval <- function(p) {
  ifelse(p < 0.001, "<0.001",
    ifelse(p < 0.01,  sprintf("%.3f", p),
      ifelse(p < 0.1,   sprintf("%.2f", p),
                         sprintf("%.2f", p))))
}

#keep only upper triangle
pairwise_df_ttt$pval <- format_pval(pairwise_df_ttt$pval)
pairwise_df_ttt$group1_copy <- pairwise_df_ttt$group1
pairwise_df_ttt$group2_copy <- pairwise_df_ttt$group2

pairwise_df_ttt$group1 <- factor(pairwise_df_ttt$group1, levels = c("U_low", "M_low", "U_high"))
pairwise_df_ttt$group2 <- factor(pairwise_df_ttt$group2, levels = c("M_high", "M_low", "U_high"))


p <- ggplot(pairwise_df_ttt, aes(x = group1, y = group2)) +
  geom_tile(fill = "white", color = "black", linewidth = 0.5) +
  geom_text(aes(label = pval), size = 3) +
  theme_nothing()+
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0))

#add color bar label below
p2 <- p + geom_tile(aes(y = 3.75, fill = group1_copy), height = 0.25, width = 1) + 
  geom_tile(aes(x = 0.3, fill = group2_copy), height = 1, width = 0.2) + 
  coord_cartesian(clip = "off")+
  scale_fill_manual(values=colors_surv_ibr_p)

#repeat für os
#create log-rank p values for all comparisons
km_surv_os <- km_surv %>%
  mutate(
    IGHV = ifelse(IGHV.status == "1", "M", "U"),
    ibr_mean = ifelse(ibr_resistance_os == "1", "high", "low"),
    group = paste(IGHV, ibr_mean, sep = "_")
  )

#create pairwise group combinations
pairwise_df_os <- combn(unique(km_surv_os$group), 2, simplify = FALSE) %>%
  map_dfr(~{
    g1 <- .x[1]
    g2 <- .x[2]

    d <- km_surv_ttt %>% filter(group %in% c(g1, g2))
    sd <- survdiff(Surv(TTT) ~ group, data = d)

    tibble(
      group1 = g1,
      group2 = g2,
      pval   = pchisq(sd$chisq, df = 1, lower.tail = FALSE)
    )
  })

#format p-values
pairwise_df_os$pval <- format_pval(pairwise_df_os$pval)

####

#combine
Fig7f <- (Fig7d1+plot_spacer()+Fig7d2) + plot_layout(guides = 'collect', 
                                                     widths = c(1, 0.1, 1)) & 
  theme(legend.position='right', plot.margin = margin(t=0.5, l=0.5, unit = "cm")) &
  guides(color = guide_legend(ncol = 1))
Fig7f

#Fig7 multipanel
row2 <- plot_grid(Fig7f, NULL, ncol = 2, rel_widths = c(0.8, 0.2), 
                            labels=c("E", ""), label_size=14)

col1row1 <- plot_grid(Fig7a, Fig7b,
                  align = "v", axis = c("l", "r"), ncol = 1, 
                  labels=c("A", "B"), label_size=14)
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
col2row1 <- plot_grid(Fig7d, Fig7e, NULL,
                  align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.4, 0.3, 0.2), 
                  labels=c("C", "D", ""), label_size=14)

row1 <- plot_grid(col1row1, col2row1, rel_widths = c(0.7, 0.3))

Fig7 <- plot_grid(row1, row2, NULL, nrow=3, rel_heights = c(0.45, 0.15, 0.4)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))
                                    
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.png", width=42, height=59.4, units = "cm", dpi=600)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps

11.4 Modeling clinical outcomes by drug response in CLL

#prepare data for CLL treated with ibrutinib and fludarabine-containing regimens
timeline_cll <- timeline |> 
  left_join(screen_long[,c("patientID", "diagnosis")], by="patientID") |> 
  filter(diagnosis == "CLL") |> 
  select(-diagnosis) |> 
  distinct() |> 
  arrange(patientID)
## Warning in left_join(timeline, screen_long[, c("patientID", "diagnosis")], : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 80011 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#nr of individual patients
paste0("number of patients: ",length(unique(timeline_cll$patientID)))
## [1] "number of patients: 130"
#treatments
#select patients with ibrutinib treatment (all therapy lines)
ibr <- timeline_cll |> 
  filter(regimen %in% c("Ibr")) |> 
  rename(ibr_start=start, ibr_end=end) |> 
  group_by(patientID) |> 
  arrange(patientID, ibr_start) |> 
  mutate(ibr_line = seq(1,n()))

#nr of individual ibr patients
paste0("number of patients treated with ibrutinib: ",length(unique(ibr$patientID)))
## [1] "number of patients treated with ibrutinib: 25"
ibr_tx_patID <- unique(ibr$patientID)

#calculate time to treatment (TTNT) for each ibrutinib line
ibr_start <- timeline_cll |> 
  filter(regimen %in% c("Ibr")) |> 
  dplyr::select(patientID, regimen, ibr_start = start, ibr_end=end) |> 
  rename(ibr_regimen = regimen)

#filter for treatments after ibrutinib and calculate ttnt
ibr_ttnt <- timeline_cll %>%
  filter(patientID %in% ibr_tx_patID) |> 
  merge(ibr_start, by = "patientID") |>
  group_by(patientID, ibr_start) |> 
  filter(start > ibr_start) |> #all treatments starting after ibr
  mutate(ttnt = as.numeric(difftime(as.POSIXct(start), as.POSIXct(ibr_start), units = "days"))) |> 
  arrange(start) |> 
  slice_head(n=1) |> #select next treatment after ibr
  ungroup() |> 
  dplyr::select(patientID, ibr_start, ttnt)

#combine dataframes
ibr <- merge(ibr, ibr_ttnt, by=c("patientID", "ibr_start"), all.x = TRUE)

#add survival data
df_surv <- survival |> 
  dplyr::select(patID, sampleDate, LKA, died, lastUpdate) |> 
  group_by(patID) |> 
  arrange(sampleDate) |> 
  slice_tail(n=1) |> 
  dplyr::select(-sampleDate)

ibr_surv <- merge(ibr, df_surv, by.x="patientID", by.y="patID", all.x = TRUE) |> 
  mutate(next_tx = ifelse(
    is.na(ttnt), FALSE, TRUE),
    ttnt = ttnt/365,
    ttnt = ifelse(
      next_tx==TRUE, ttnt, as.numeric(difftime(as.POSIXct(LKA), as.POSIXct(ibr_start), units = "days"))/365)
  ) |> 
  relocate(next_tx, .after = "ttnt")

#select untreated samples before ibrutinib start (closest to ibr start)
samples_ibr_tx <- screen_allSamples |> 
  filter(patientID %in% ibr_tx_patID) |> 
  arrange(patientID) |> 
  dplyr::select (patientID, sampleID) |> 
  left_join(survival[, colnames(survival) %in% c("sampleID", "sampleDate")],
            by = "sampleID") |> 
  merge(ibr_surv[,c("patientID", "ibr_line", "ibr_start")], by="patientID") |> 
  mutate(diff_d_ibr = as.numeric(difftime(as.POSIXct(sampleDate), as.POSIXct(ibr_start), units = "days")))

ibr_pat_treatments <- timeline_cll |> 
  filter(patientID %in% ibr_tx_patID) |> 
  merge(ibr_surv[,c("patientID", "ibr_start")], by="patientID")

samples_ibr_tx2 <- samples_ibr_tx %>%
  dplyr::select(-ibr_start) |> 
  left_join(ibr_pat_treatments, by = c("patientID")) %>%
  mutate(during_treatment = sampleDate >= start & sampleDate <= end) |> 
  arrange(patientID, diff_d_ibr)
## Warning in left_join(samples_ibr_tx %>% dplyr::select(-ibr_start), ibr_pat_treatments, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#filter for samples taken before ibr, without treatment
samples_ibr_pre <- samples_ibr_tx2 |> 
  group_by(patientID, sampleID, sampleDate, diff_d_ibr) |>
  mutate(during_treatment_any = ifelse(sum(as.numeric(during_treatment) >0), TRUE, FALSE)) |> 
  filter(diff_d_ibr < 0,during_treatment_any == FALSE) |> #untreated samples before ibr
  dplyr::select(patientID, sampleID, sampleDate, ibr_line, diff_d_ibr, ibr_start, during_treatment) |> 
  distinct() |> 
  arrange(patientID, desc(diff_d_ibr)) |> 
  filter(ibr_line == 1) |>  #filter for first time ibr
  group_by(patientID) |> 
  slice_head(n=1)

ibr_tx_patID_pre <- unique(samples_ibr_pre$patientID)  

#combine with viability data: during ibr treatment
ibr_pre <- samples_ibr_pre[,c("patientID", "sampleID", "sampleDate")]

#merge the tables
merged_pre <- left_join(ibr_pre, screen_allSamples, by=c("patientID", "sampleID")) |>
  pivot_longer(cols = -c(patientID, sampleID, sampleDate), names_to = "drug", values_to = "viability") |> 
  mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")), drug = str_replace(drug, "_\\d+$", "")) |> 
  left_join(drugs |> 
              dplyr::select(Drug, Group), by = c("drug" = "Drug")) |> 
  rowwise() |> 
  mutate(concentration = drug_list[[Group]][conc_index]) |> 
  ungroup()

#create mean viabilities and combine with genetic features and survival
viabTab.ibr_pre <- merged_pre |> 
  group_by(patientID, sampleDate, drug) |> 
  arrange(patientID, drug) |>
  mutate(viab_mean = mean(viability)) |> 
  ungroup() |> 
  merge(metadata[,c("Patient.ID", "IGHV.status", "del17p", "TP53")], by.x="patientID", by.y="Patient.ID") |> 
  merge(ibr_surv, by="patientID") |> 
  filter(!is.na(ttnt), ibr_line == 1) |> 
  mutate(date_diff_d = as.numeric(difftime(as.POSIXct(ibr_start), as.POSIXct(sampleDate), units = "days")))

#filter samples during ibrutinib treatment
samples_ibr_post <- samples_ibr_tx2 |> 
  group_by(patientID, sampleID, sampleDate, diff_d_ibr) |>
  mutate(during_treatment_any = ifelse(sum(as.numeric(during_treatment) >0), TRUE, FALSE)) |> 
  filter(diff_d_ibr >0, regimen %in% c("Ibr"), during_treatment_any == TRUE) |>
  dplyr::select(patientID, sampleID, sampleDate, diff_d_ibr, ibr_start, during_treatment) |> 
  distinct(sampleDate) |> 
  arrange(patientID, sampleID, desc(diff_d_ibr)) |> 
  group_by(patientID) |> 
  slice_head(n=1)

ibr_tx_patID_post <- unique(samples_ibr_post$patientID)

#combine with viability data: during ibr treatment
ibr_post <- samples_ibr_post[,c("patientID", "sampleID", "sampleDate")]

#merge the tables
merged_post <- left_join(ibr_post, screen_allSamples, by=c("patientID", "sampleID")) |>
  pivot_longer(cols = -c(patientID, sampleID, sampleDate), names_to = "drug", values_to = "viability") |> 
  mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")), drug = str_replace(drug, "_\\d+$", "")) |> 
  left_join(drugs |> 
              dplyr::select(Drug, Group), by = c("drug" = "Drug")) |> 
  rowwise() |> 
  mutate(concentration = drug_list[[Group]][conc_index]) |> 
  ungroup()

#create mean viabilities and combine with genetic features and survival
viabTab.ibr_post <- merged_post |> 
  group_by(patientID, sampleDate, drug) |> 
  arrange(patientID, drug) |>
  mutate(viab_mean = mean(viability)) |> 
  ungroup() |> 
  left_join(metadata[,c("Patient.ID", "IGHV.status", "del17p", "TP53")], by=c("patientID" = "Patient.ID")) |> 
  merge(ibr_surv, by="patientID") |> 
  filter(!is.na(ttnt), ibr_line == 1) |> 
  mutate(date_diff_d = as.numeric(difftime(as.POSIXct(ibr_start), as.POSIXct(sampleDate), units = "days")))

#univariate analysis for all drugs
ibr_drugs_surv <- viabTab.ibr_post |> 
  group_by(patientID, drug, ttnt, next_tx) |> 
  summarize(mean_viab = mean(viability)) |> 
  pivot_wider(names_from = "drug", values_from = "mean_viab") |> 
  as.tibble()
## `summarise()` has grouped output by 'patientID', 'drug', 'ttnt'. You can
## override using the `.groups` argument.
resTTNT_uni_all <- data.frame()

for (drug in drugs$Drug) {
    res <- filter(ibr_drugs_surv, !is.na(ttnt)) %>%
      do(com_uni(.[[drug]], .$ttnt, .$next_tx, TRUE))
    
    resTTNT_uni_row <- data.frame(
      Drug = drug,
      cox = res
    )
    resTTNT_uni_all <- rbind(resTTNT_uni_all, resTTNT_uni_row)
}

resTTNT_uni_all <- resTTNT_uni_all |> 
  arrange(drug, cox.p) |> 
  arrange(cox.p) %>% 
  mutate(p.adj = p.adjust(cox.p, method = "BH"))

#plot
#order by c-index
drug_order <- resTTNT_uni_all |> 
  arrange(desc(cox.concordance)) |> 
  pull(Drug)

resTTNT_uni_all$Drug <- factor(resTTNT_uni_all$Drug, levels = drug_order)

cox_ibr <- resTTNT_uni_all |> 
  filter(cox.concordance > 0.75 | Drug %in% c("Ibrutinib", "ONO-4059")) |> 
  ggplot(aes(x=Drug, y=cox.concordance))+
  geom_hline(yintercept=0.5, color="grey", linetype="dashed")+
  geom_hline(yintercept=0.75, color="grey", linetype="dashed")+
  geom_point()+
  geom_errorbar(aes(ymin=cox.concordance-cox.concordance_se, ymax=cox.concordance+cox.concordance_se), width = 0.2)+
  theme_figures_angle45_x+
  labs(title="Univariate Cox TTNT:\nDrug response during ibrutinib treatment", y="C-index", x="Drug")+
  scale_y_continuous(limits = c(0,1.1), breaks=c(0.25,0.5,0.75,1))

FigS10g <- cox_ibr + theme(plot.margin = margin(t=1, l=1, r=1, b=2, unit = "cm"))


#plot ibrutinib and TW-37
ibr_pre <- viabTab.ibr_post |> 
  filter(drug %in% c("Ibrutinib" , "TW-37")) |> 
  ggplot(aes(x=factor(concentration), y=viability, color=next_tx))+
  geom_boxplot(aes(group=interaction(concentration, next_tx)), width = 0.5, outliers=FALSE, position = position_dodge(width = 0.75))+
  geom_jitter(aes(group=interaction(concentration, next_tx), shape=IGHV.status), size=1, alpha = 0.75, 
              position = position_jitterdodge(jitter.width=0.1, dodge.width = 0.75))+
  labs(title="", y="Viability", x=bquote("Concentration ("*mu*"M)"), color = "Next treatment\nduring follow-up")+
  theme_figures_facet_angle45_x_legend+
  scale_y_continuous(breaks=seq(0.2,1.0, 0.2), limits=c(0.2,1.2)) +
  stat_compare_means(method = "t.test", label="p.signif", size=3, label.y = 1.15)+
  scale_shape_discrete(name="IGHV", labels=c("unmutated", "mutated"))+
  guides(color = guide_legend(override.aes = list(label = ""))) +
  facet_wrap(~drug , scales="free_x")

FigS10h <- ibr_pre + theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm"))
#plot effect of single drugs: fludarabine (samples prior the treatment)
#patients with flu tx, select last fludarabine monotherapy
flu <- timeline_cll |> 
  filter(regimen %in% c("F", "FC", "FCR", "BR")) |> 
  dplyr::select(-end) |> 
  group_by(patientID) |> 
  arrange(patientID, start) |> 
  slice_tail(n=1) |> 
  rename(flu_start=start)

#nr of individual flu patients
flu_tx_patID <- unique(flu$patientID)

#calculate TTNT
flu_start <- timeline_cll |> 
  filter(regimen %in% c("F", "FC", "FCR", "BR")) |> 
  dplyr::select(patientID, flu_start = start) |> 
  group_by(patientID) |> 
  arrange(patientID, flu_start) |> 
  slice_tail(n=1)

#filter treatments after fludarabine and calculate ttnt
flu_ttnt <- timeline_cll %>%
  filter(patientID %in% flu_tx_patID) |> 
  merge(flu_start, by = "patientID") |> 
  filter(start > flu_start) |> 
  mutate(ttnt = as.numeric(difftime(as.POSIXct(start), as.POSIXct(flu_start), units = "days"))) |> 
  group_by(patientID) |> 
  arrange(start) |> 
  slice_tail(n=1) |> 
  dplyr::select(patientID, ttnt)

#combine dataframes
flu <- merge(flu, flu_ttnt, by="patientID", all.x = TRUE)

flu_surv <- merge(flu, df_surv, by.x="patientID", by.y="patID", all.x = TRUE) |> 
  mutate(next_tx = ifelse(
    is.na(ttnt), FALSE, TRUE),
    ttnt = ttnt/365,
    ttnt = ifelse(
      next_tx==TRUE, ttnt, as.numeric(difftime(as.POSIXct(LKA), as.POSIXct(flu_start), units = "days"))/365)
  ) |> 
  relocate(next_tx, .after = "ttnt")

#samples
#select untreated samples before flu start, select sample closest to flu start (???)
samples_flu_tx <- screen_allSamples |> 
  filter(patientID %in% flu_tx_patID) |> 
  arrange(patientID) |> 
  dplyr::select (patientID, sampleID) |> 
  left_join(survival[, colnames(survival) %in% c("sampleID", "sampleDate")],
            by = "sampleID") |> 
  merge(flu_surv[,c("patientID", "flu_start")], by="patientID") |> 
  mutate(diff_d_flu = as.numeric(difftime(as.POSIXct(flu_start), as.POSIXct(sampleDate), units = "days"))) |> 
  filter(!is.na(sampleDate))

flu_pat_treatments <- timeline_cll |> 
  filter(patientID %in% flu_tx_patID) |> 
  merge(flu_surv[,c("patientID", "flu_start")], by="patientID")

samples_flu_tx2 <- samples_flu_tx %>%
  dplyr::select(-flu_start) |> 
  left_join(flu_pat_treatments, by = c("patientID")) %>%
  mutate(during_treatment = sampleDate >= start & sampleDate <= end) |> 
  arrange(patientID, diff_d_flu)
## Warning in left_join(samples_flu_tx %>% dplyr::select(-flu_start), flu_pat_treatments, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 7 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#filter for samples without treatment, include samples taken after treatment
samples_flu_tx3 <- samples_flu_tx2 |> 
  group_by(patientID, sampleID, sampleDate, diff_d_flu) |>
  mutate(during_treatment_any = ifelse(sum(as.numeric(during_treatment) >0), TRUE, FALSE)) |> 
  filter(
    diff_d_flu >= 0, 
    during_treatment_any == FALSE) |> 
  dplyr::select(patientID, sampleID, sampleDate, diff_d_flu, flu_start, during_treatment) |> 
  distinct() |> 
  arrange(patientID, diff_d_flu) |> 
  group_by(patientID) |> 
  slice_head(n=1)

#combine with viability data: during ibr treatment
flu_pre <- samples_flu_tx3[,c("patientID", "sampleID", "sampleDate")]

#merge the tables
flu_merged_pre <- left_join(flu_pre, screen_allSamples, by=c("patientID", "sampleID")) |>
  pivot_longer(cols = -c(patientID, sampleID, sampleDate), names_to = "drug", values_to = "viability") |> 
  mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")), drug = str_replace(drug, "_\\d+$", "")) |> 
  filter(drug == "Fludarabine") |> 
  left_join(drugs |> 
              dplyr::select(Drug, Group), by = c("drug" = "Drug")) |> 
  rowwise() |> 
  mutate(concentration = drug_list[[Group]][conc_index]) |> 
  ungroup()

  
#fit the dose-response model for fludarabine
conc_seq <- data.frame(conc = seq(min(flu_merged_pre$concentration), max(flu_merged_pre$concentration), length.out = 500))
predictions_list <- list()

for (pat in unique(flu_merged_pre$patientID)) {
  
  # Fit model    
  model_flu <- drm(viability ~ concentration, data = flu_merged_pre[flu_merged_pre$patientID==pat,], 
                   fct = LL.4(names = c("hill", "min_value", "max_value", "ec_50")))
  
  # Create predictions
  new_data_flu <- data.frame(concentration = conc_seq$conc)  # Note: use 'concentration' to match your model
  new_data_flu$viability <- predict(model_flu, newdata = new_data_flu)
  new_data_flu$patientID <- pat
  
  # Store in list
  predictions_list[[pat]] <- new_data_flu
}
  
#combine all predictions
all_predictions_flu <- bind_rows(predictions_list)

  
#plot drug response curves
flu_response <- all_predictions_flu |> 
  left_join(flu_surv, by="patientID") |> 
  filter(!is.na(next_tx)) |> 
  ggplot(aes(x=concentration, y=viability, color=next_tx, group=patientID)) +
  geom_line()+
  theme_figures+
  labs(title = "Fludarabine", y="Viability", x=bquote("Concentration ("*mu*"M)"), color = "Next treatment\nduring follow-up")+
  scale_y_continuous(breaks=seq(0.0,1.2, 0.2), limits=c(0.0,1.2))+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x, n=3), labels = trans_format("log10", math_format(10^.x)), limits = c(0.016, 10))

FigS10e <- flu_response + theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm"))


#fit sigmoid curve for each sample
fit_flu <- flu_merged_pre |> 
    dplyr::rename(Drug = drug, viab = viability, conc = concentration) |> 
    dplyr::select(patientID, sampleID, Drug, conc, viab)

ic50Tab_flu <- fit_flu %>%
    group_by(patientID, Drug) %>% nest() %>%
    mutate(mm = map(data, calcParm)) %>%
    unnest(mm) %>%
    dplyr::select(-data) %>% ungroup()
## Fit failed for a group: Lapack routine dgesv: system is exactly singular: U[1,1] = 0
## Fit failed for a group: Lapack routine dgesv: system is exactly singular: U[1,1] = 0
## Fit failed for a group: Lapack routine dgesv: system is exactly singular: U[1,1] = 0
## Fit failed for a group: Lapack routine dgesv: system is exactly singular: U[1,1] = 0
## Warning: There were 164 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `mm = map(data, calcParm)`.
## ℹ In group 1: `patientID = "P0005"` `Drug = "Fludarabine"`.
## Caused by warning in `optim()`:
## ! method L-BFGS-B uses 'factr' (and 'pgtol') instead of 'reltol' and 'abstol'
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 163 remaining warnings.
#combine with outcomes
ic50Tab_flu  <- merge(ic50Tab_flu, flu_surv, by="patientID")
  
#univariate cox regression
parameters <- colnames(ic50Tab_flu[,c(3:8)])
parameters <- append(parameters, c("viab.mean"))
drugs_select <- c("Fludarabine")
  
resTTNT_uni_flu <- data.frame()
  
  for (par in parameters) {
    for (drug in drugs_select) {
      res <- filter(ic50Tab_flu, !is.na(HS)) %>%
        do(com_uni(.[[par]], .$ttnt, .$next_tx, TRUE))
      
      resTTNT_uni_row <- data.frame(
        parameter = par,
        cox = res,
        Drug = drug
      )
      resTTNT_uni_flu <- rbind(resTTNT_uni_flu, resTTNT_uni_row)
      
    }
  }
## Warning in mean.default(response, na.rm = TRUE): argument is not numeric or
## logical: returning NA
resTTNT_uni_flu <- resTTNT_uni_flu |> 
  arrange(Drug, cox.p)

#plot
cox_flu <- resTTNT_uni_flu |> 
  filter(parameter %in% c("Einf", "HS", "IC50", "R2", "AUC", "viab.mean")) |> 
  ggplot(aes(x=parameter, y=cox.concordance))+
  geom_hline(yintercept=0.5, color="grey", linetype="dashed")+
  geom_hline(yintercept=0.75, color="grey", linetype="dashed")+
  geom_point()+
  geom_errorbar(aes(ymin=cox.concordance-cox.concordance_se, ymax=cox.concordance+cox.concordance_se), width = 0.2)+
  theme_figures_angle45_x+
  labs(title="Univariate Cox TTNT:\nFludarabine drug-response metrics", y="C-index", x="Parameter")+
  scale_y_continuous(limits=c(0,1.1), breaks=c(0,0.25,0.5,0.75,1))+
  scale_x_discrete(labels = c("AUC",
    expression(E[inf]),   # subscript 
    "Hill coefficient", 
    expression(IC[50]),  # subscript
    expression(R^2),    # superscript
    "Mean viability"))

FigS10f <- cox_flu + theme(plot.margin = margin(t=1, l=1, r=1, b=2, unit = "cm"))

11.5 Modeling clinical outcome by drug response in AML

#select for cytarabine in AML
aml_pat <- screen_long |> 
  filter(diagnosis == "AML") |>
  distinct(patientID) |> 
  pull(patientID)

#for AraC: nr of patients with chemo, select samples before chemo
patAML <- survival |> 
  filter(patID %in% aml_pat) |> 
  group_by(patID) |> 
  arrange(sampleDate) |> 
  slice_head(n=1) |> 
  mutate(sampleBeforeTx = ifelse(sampleDate <= firstTreatDate, TRUE, FALSE), .before ="firstTreatDate") |> 
  filter(sampleBeforeTx == TRUE) |> 
  rename(patientID = patID)

#combine with viability data, filter for cytarabine
aml_merged <- merge(patAML, screen_long, by=c("patientID", "sampleID")) |> 
  filter(drug == "Cytarabine") |> 
  group_by(patientID, sampleID, drug) |> 
  mutate(viab.mean = mean(viability))


#fit sigmoid curve for each each sample
fit_cyt <- aml_merged |> 
  dplyr::rename(Drug = drug, viab = viability, conc = concentration) |> 
  dplyr::select(patientID, sampleID, Drug, conc, viab)

ic50Tab_cyt <- fit_cyt %>%
  group_by(patientID, Drug) %>% nest() %>%
  mutate(mm = map(data, calcParm)) %>%
  unnest(mm) %>%
  dplyr::select(-data) %>% ungroup()
## Fit failed for a group: Lapack routine dgesv: system is exactly singular: U[1,1] = 0
## Warning: There were 117 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `mm = map(data, calcParm)`.
## ℹ In group 1: `patientID = "P1167"` `Drug = "Cytarabine"`.
## Caused by warning in `optim()`:
## ! method L-BFGS-B uses 'factr' (and 'pgtol') instead of 'reltol' and 'abstol'
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 116 remaining warnings.
#combine with survival data
aml_surv_viab <- merge(aml_merged, ic50Tab_cyt, by.x=c("patientID", "drug"), by.y=c("patientID", "Drug"))

#cox regression
drugs_select <- c("Cytarabine")

resOS_uni_aml <- data.frame()

for (par in parameters) {
  for (drug in drugs_select) {
    res <- filter(aml_surv_viab, !is.na(HS), !is.na(Einf), !is.na(IC50), !is.na(pF), !is.na(R2), !is.na(AUC)) %>%
      do(com_uni(.[[par]], .$OS, .$died, TRUE))
    
    resOS_uni_row <- data.frame(
      parameter = par,
      cox = res,
      Drug = drug
    )
    resOS_uni_aml <- rbind(resOS_uni_aml, resOS_uni_row)
    
  }
}
resOS_uni_aml <- resOS_uni_aml |> 
  arrange(Drug, cox.p)

#plot
cox_cyt <- resOS_uni_aml |> 
  filter(!(parameter %in%c("pF"))) |> 
  ggplot(aes(y=cox.HR, x=parameter))+
  geom_hline(yintercept = 1, linetype = "dotted") +
  geom_point(position = position_dodge(width=0.75), color="#cb181d") +
  geom_errorbar(position = position_dodge(width =0.75), 
                aes(ymin = cox.lower, ymax = cox.higher), width = 0.3, color="#cb181d") + 
  geom_text(position = position_dodge(width =0.75), size = 3,
            aes(y = cox.higher + 0.5, 
                label = ifelse(cox.p < 0.001, 
                               "italic(P)<0.001", 
                               paste0("italic(P)==", sprintf("%1.3f", cox.p)))),
            parse = TRUE, color="#cb181d")+
  theme_figures+
  coord_flip() +
  labs(title="Univariate Cox OS:\nCytarabine drug-response metrics", y="Hazard ratio", x="")+
  scale_x_discrete(labels = c(
    "AUC",
    expression(E[inf]),
    "Hill coefficient", 
    expression(IC[50]),
    expression(R^2),
    "Mean viability"))+
  guides(color = guide_legend(override.aes = aes(label = "")))+
  ylim(0,3)

FigS10i <- cox_cyt + theme(plot.margin = margin(t=1, r = 1, b=2, unit = "cm"))
FigS10i

#kaplan-meier
aml_km <- aml_merged |> 
  group_by(patientID, OS, died) |> 
  summarize(viab.mean = mean(viability)) |> 
  drop_na(OS)
## `summarise()` has grouped output by 'patientID', 'OS'. You can override using
## the `.groups` argument.
#maxstat
#OS
res.cut <- surv_cutpoint(aml_km, time = "OS", event = "died",
                         variables = "viab.mean")

summary(res.cut)
##           cutpoint statistic
## viab.mean    0.837      2.44
os_value <- summary(res.cut)$cutpoint
plot(res.cut, "viab.mean")
## $viab.mean

res.cat <- surv_categorize(res.cut)
head(res.cat)
##      OS  died viab.mean
## 1 0.674  TRUE       low
## 2 0.333  TRUE       low
## 3 1.342  TRUE       low
## 4 0.586  TRUE       low
## 5 0.370  TRUE      high
## 6 7.137 FALSE       low
fit <- survfit(Surv(OS, died) ~ viab.mean, data = res.cat)

aml_km <- aml_km |> 
  mutate(cyt_resistance_os = ifelse(viab.mean < os_value, "Low mean viability", "High mean viability")
  )

os <- survfit2(Surv(OS, died) ~ cyt_resistance_os, data = aml_km)

pval_os <- sub("^p=", "", survfit2_p(os))

aml_os <- os |>
  ggsurvfit() +
  labs(x = "Years", y = "Overall survival", title = 
         "Cytarabine") + 
  add_censor_mark(size = 1, alpha = 0.5) +
  scale_ggsurvfit() +
  labs(x="Years")+
  theme_figures+
  theme(legend.position = "bottom")+
  annotate(
    "text",
    x = 3,
    y = 0.75,
    label = paste0("Log-rank~italic(P)~`=`~", pval_os),
    parse = TRUE,
    hjust = 1,
    vjust = 0,
    size = 3
  )+
  scale_x_continuous(limits=c(0,3))+
  scale_color_manual(values = c("#CA054D", "#B96D40"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
FigS10j <- aml_os + theme(plot.margin = margin(t=1, l = 1, b=0.5, unit = "cm"))
FigS10j
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_step()`).

12 Session info

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggraph_2.2.2                     tidygraph_1.3.1                 
##  [3] readxl_1.4.5                     data.table_1.17.8               
##  [5] org.Hs.eg.db_3.20.0              AnnotationDbi_1.68.0            
##  [7] biomaRt_2.62.1                   msigdbr_25.1.1                  
##  [9] fgsea_1.32.4                     apeglm_1.28.0                   
## [11] DESeq2_1.46.0                    SummarizedExperiment_1.36.0     
## [13] Biobase_2.66.0                   MatrixGenerics_1.18.1           
## [15] matrixStats_1.5.0                GenomicRanges_1.58.0            
## [17] GenomeInfoDb_1.42.3              IRanges_2.40.1                  
## [19] S4Vectors_0.44.0                 BiocGenerics_0.52.0             
## [21] statebins_1.4.0                  gtable_0.3.6                    
## [23] BloodCancerMultiOmics2017_1.26.0 ggsurvfit_1.2.0                 
## [25] draw_1.0.0                       gridGraphics_0.5-1              
## [27] scales_1.4.0                     plotrix_3.8-4                   
## [29] ggh4x_0.3.1                      ggtext_0.1.2                    
## [31] paletteer_1.6.0                  Rtsne_0.17                      
## [33] magrittr_2.0.4                   ggplotify_0.1.3                 
## [35] circlize_0.4.16                  ComplexHeatmap_2.22.0           
## [37] flextable_0.9.10                 writexl_1.5.4                   
## [39] corrplot_0.95                    glmnet_4.1-10                   
## [41] Matrix_1.7-4                     patchwork_1.3.2                 
## [43] maxstat_0.7-26                   survival_3.8-3                  
## [45] survminer_0.5.1                  ggpubr_0.6.2                    
## [47] dplyr_1.1.4                      lubridate_1.9.4                 
## [49] forcats_1.0.1                    stringr_1.6.0                   
## [51] purrr_1.2.0                      readr_2.1.5                     
## [53] tidyr_1.3.1                      tibble_3.3.0                    
## [55] tidyverse_2.0.0                  drc_3.0-1                       
## [57] MASS_7.3-65                      gridExtra_2.3                   
## [59] cowplot_1.2.0                    ggrepel_0.9.6                   
## [61] RColorBrewer_1.1-3               ggpmisc_0.6.2                   
## [63] ggpp_0.5.9                       ggplot2_4.0.1                   
## [65] knitr_1.50                       BiocStyle_2.34.0                
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.6                devtools_2.4.6          httr_1.4.7             
##   [4] doParallel_1.0.17       numDeriv_2016.8-1.1     tools_4.4.2            
##   [7] backports_1.5.0         utf8_1.2.6              R6_2.6.1               
##  [10] mgcv_1.9-4              GetoptLong_1.0.5        litedown_0.8           
##  [13] withr_3.0.2             prettyunits_1.2.0       quantreg_6.1           
##  [16] cli_3.6.5               textshaping_1.0.4       exactRankTests_0.8-35  
##  [19] officer_0.7.0           sandwich_3.1-1          labeling_0.4.3         
##  [22] sass_0.4.10             mvtnorm_1.3-3           survMisc_0.5.6         
##  [25] S7_0.2.0                genefilter_1.88.0       askpass_1.2.1          
##  [28] commonmark_2.0.0        systemfonts_1.3.1       yulab.utils_0.2.1      
##  [31] svglite_2.2.2           sessioninfo_1.2.3       bbmle_1.0.25.1         
##  [34] rstudioapi_0.17.1       RSQLite_2.4.4           generics_0.1.4         
##  [37] shape_1.4.6.1           gtools_3.9.5            car_3.1-3              
##  [40] zip_2.3.3               abind_1.4-8             lifecycle_1.0.4        
##  [43] multcomp_1.4-29         yaml_2.3.10             carData_3.0-5          
##  [46] BiocFileCache_2.14.0    SparseArray_1.6.2       blob_1.2.4             
##  [49] crayon_1.5.3            bdsmatrix_1.3-7         lattice_0.22-7         
##  [52] annotate_1.84.0         KEGGREST_1.46.0         magick_2.9.0           
##  [55] pillar_1.11.1           rjson_0.2.23            codetools_0.2-20       
##  [58] fastmatch_1.1-6         glue_1.8.0              fontLiberation_0.1.0   
##  [61] remotes_2.5.0           vctrs_0.6.5             png_0.1-8              
##  [64] cellranger_1.1.0        assertthat_0.2.1        rematch2_2.1.2         
##  [67] emdbook_1.3.14          cachem_1.1.0            xfun_0.54              
##  [70] S4Arrays_1.6.0          coda_0.19-4.1           iterators_1.0.14       
##  [73] tinytex_0.58            KMsurv_0.1-6            ellipsis_0.3.2         
##  [76] TH.data_1.1-4           nlme_3.1-168            usethis_3.2.1          
##  [79] bit64_4.6.0-1           fontquiver_0.2.1        filelock_1.0.3         
##  [82] progress_1.2.3          bslib_0.9.0             colorspace_2.1-2       
##  [85] DBI_1.2.3               tidyselect_1.2.1        bit_4.6.0              
##  [88] compiler_4.4.2          curl_7.0.0              httr2_1.2.1            
##  [91] SparseM_1.84-2          xml2_1.4.1              ggdendro_0.2.0         
##  [94] fontBitstreamVera_0.1.1 DelayedArray_0.32.0     bookdown_0.45          
##  [97] rappdirs_0.3.3          digest_0.6.38           rmarkdown_2.30         
## [100] XVector_0.46.0          htmltools_0.5.8.1       pkgconfig_2.0.3        
## [103] dbplyr_2.5.1            fastmap_1.2.0           rlang_1.1.6            
## [106] GlobalOptions_0.1.2     UCSC.utils_1.2.0        farver_2.1.2           
## [109] jquerylib_0.1.4         zoo_1.8-14              jsonlite_2.0.0         
## [112] BiocParallel_1.40.2     polynom_1.4-1           Formula_1.2-5          
## [115] GenomeInfoDbData_1.2.13 Rcpp_1.1.0              viridis_0.6.5          
## [118] babelgene_22.9          gdtools_0.4.4           stringi_1.8.7          
## [121] zlibbioc_1.52.0         plyr_1.8.9              pkgbuild_1.4.8         
## [124] parallel_4.4.2          graphlayouts_1.2.2      Biostrings_2.74.1      
## [127] splines_4.4.2           gridtext_0.1.5          hms_1.1.4              
## [130] locfit_1.5-9.12         igraph_2.2.1            uuid_1.2-1             
## [133] markdown_2.0            ggsignif_0.6.4          reshape2_1.4.5         
## [136] pkgload_1.4.1           XML_3.99-0.20           evaluate_1.0.5         
## [139] ipflasso_1.1            BiocManager_1.30.26     tweenr_2.0.3           
## [142] tzdb_0.5.0              foreach_1.5.2           MatrixModels_0.5-4     
## [145] polyclip_1.10-7         openssl_2.3.4           km.ci_0.5-6            
## [148] clue_0.3-66             ggforce_0.5.0           broom_1.0.10           
## [151] xtable_1.8-4            rstatix_0.7.3           viridisLite_0.4.2      
## [154] ragg_1.5.0              mltools_0.3.5           memoise_2.0.1          
## [157] beeswarm_0.4.0          cluster_2.1.8.1         timechange_0.3.0